MAIN FUNCTION
This is the function that we actually call to run our script. Because there is no code beyond this chunk I’m going to keep outputs printing. Beyond looking at the last page, you can basically consider this the end of the document!
#Our main function, given a start page and a depth, it gathers data from that many
#search pages + last and returns a list of dataframes
scrape_so <- function(n_depth){
input_url = 'https://stackoverflow.com/questions/tagged/r?tab=newest&page=1&pagesize=50'
scrping = TRUE
questions_list = list()
answers_list = list()
comments_list = list()
d = 1
while (scrping == TRUE){
search_data <<- search_read(input_url, n_depth)
# print(paste("SEARCH DATA LENGTH:", length(search_data)))
# print(c(typeof(search_data[[1]]), print(dim(search_data[[1]]))))
# print(c(typeof(search_data[[2]]), print(dim(search_data[[2]]))))
# print(c(typeof(search_data[[3]]), print(dim(search_data[[3]]))))
questions_list[[d]] = search_data[[1]]
answers_list[[d]] = search_data[[2]]
comments_list[[d]] = search_data[[3]]
next_url = search_data[[4]]
d = d + 1
if (next_url == TRUE){scrping = FALSE} #kill the loop once we are done
input_url = paste(base_url, paste(next_url), sep='')
print(paste('INPUT URL:', input_url))
}
questions_list <<- questions_list
answers_list <<- answers_list
comments_list <<- comments_list
# print('Shape of Questions:')
# print(lapply(questions_list, dim))
# print('Shape of Answers:')
# print(lapply(answers_list, dim))
# print('Shape of Comments:')
# print(lapply(comments_list, dim))
q_df <<- do.call('rbind', questions_list)
a_df <<- do.call('rbind', answers_list)
c_df <<- do.call('rbind', comments_list)
# print('experimental bind')
# exp_df <<- rbind(questions_list[[1]], questions_list[[2]])
# print(exp_df)
print(kable(q_df))
print(kable(a_df))
print(kable(c_df))
# print(typeof(q_df))
# print(typeof(a_df))
# print(typeof(c_df))
final_df = list()
final_df[[1]] = q_df
final_df[[2]] = a_df
final_df[[3]] = c_df
return(final_df)
}
final_dataframes = scrape_so(3)
## [1] "Current URL: https://stackoverflow.com/questions/76398208/why-monthly-mean-raster-output-seem-to-get-multiplied-by-10000"
## [1] "Current URL: https://stackoverflow.com/questions/76398196/analyze-an-entire-sheet-and-display-the-results-in-a-table"
## [1] "Current URL: https://stackoverflow.com/questions/76397984/is-there-an-r-function-that-would-produce-results-similar-to-scipy-s-gaussian-fi"
## [1] "Current URL: https://stackoverflow.com/questions/76397812/seurat-findclusters-seems-to-freeze-after-one-iteration"
## [1] "Current URL: https://stackoverflow.com/questions/76397805/r-matching-a-matrix-to-a-dataframe"
## [1] "Current URL: https://stackoverflow.com/questions/76397627/ggplot-reorder-within-order-month"
## [1] "Current URL: https://stackoverflow.com/questions/76397512/ggplot-ordering-legends-with-guides-changes-continuous-legend-to-discrete"
## [1] "Current URL: https://stackoverflow.com/questions/76397435/how-can-i-put-the-iterable-numbers-into-a-new-column-each-iteration"
## [1] "Current URL: https://stackoverflow.com/questions/76397278/how-to-add-multiple-column-headers-that-span-specified-columns-to-a-dt-table-ren"
## [1] "Current URL: https://stackoverflow.com/questions/76397124/python-or-r-packages-to-draw-2d-diagrams-of-intermolecular-interactions"
## [1] "Current URL: https://stackoverflow.com/questions/76396915/aggregating-model-summaries-from-list-of-models"
## [1] "Current URL: https://stackoverflow.com/questions/76396811/using-call-for-c-pkolmogorov2x-in-r-package-submitted-to-cran"
## [1] "Current URL: https://stackoverflow.com/questions/76396563/how-to-run-a-geographically-weighted-logistic-regression-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76396425/how-to-fix-the-subscript-out-of-bounds-error-when-converting-nc-to-text-in-r-u"
## [1] "Current URL: https://stackoverflow.com/questions/76396351/how-to-stitch-merge-two-rasterstack-map-overlapping-each-other"
## [1] "Current URL: https://stackoverflow.com/questions/76396349/is-there-a-r-function-to-make-pairwise-correlation-tests-between-2-different-sub"
## [1] "Current URL: https://stackoverflow.com/questions/76396323/spgwr-error-new-data-matrix-rows-mismatch"
## [1] "Current URL: https://stackoverflow.com/questions/76396291/error-encountered-when-trying-to-plot-individual-trees-from-cforest-forest-usi"
## [1] "Current URL: https://stackoverflow.com/questions/76396233/error-in-the-mapnames-operator-is-invalid-for-atomic-vectors"
## [1] "Current URL: https://stackoverflow.com/questions/76396165/how-to-create-a-spatial-gridlines-using-latitude-and-longitude-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76395825/efficient-way-to-change-the-class-of-several-matrices-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76395807/error-in-st-kriging-of-uneven-spacetime-data-the-leading-minor-of-order-2-is-n"
## [1] "Current URL: https://stackoverflow.com/questions/76395682/how-to-merge-two-data-frames-by-first-4-digits-of-mergeable-values-only"
## [1] "Current URL: https://stackoverflow.com/questions/76395579/error-publishing-flexdashboard-as-shiny-app-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76395522/how-to-reshape-data-from-long-to-wide-format-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76395332/extract-iteratively-data-in-subfolders-within-folder-into-googledrive"
## [1] "Current URL: https://stackoverflow.com/questions/76395254/r-efficient-way-to-apply-a-match-function-on-every-element-of-vector"
## [1] "Current URL: https://stackoverflow.com/questions/76395142/including-the-title-page-and-back-page-in-a-quarto-book-in-pdf-format"
## [1] "Current URL: https://stackoverflow.com/questions/76395098/dotplot-of-enrichgo-results-with-all-of-the-ontology-terms-on-same-plot-for-comp"
## [1] "Current URL: https://stackoverflow.com/questions/76394809/create-a-variable-filtering-for-all-rows-which-are-based-on-a-numerical-value-wi"
## [1] "Current URL: https://stackoverflow.com/questions/76394793/prior-posterior-predictive-distributions-for-an-unconstrained-constrained-ppo-mo"
## [1] "Current URL: https://stackoverflow.com/questions/76394787/how-to-label-levels-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76394767/is-there-a-way-to-add-an-external-term-to-the-bekk-garch-process-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76394709/reshaping-data-frame-with-mutiple-value-columns-from-wide-to-long"
## [1] "Current URL: https://stackoverflow.com/questions/76394485/grouping-data-with-condition-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76394461/r-comparing-distance-calculations"
## [1] "Current URL: https://stackoverflow.com/questions/76394455/function-with-shiny-survey"
## [1] "Current URL: https://stackoverflow.com/questions/76394451/why-is-sub-in-r-not-recognizing-the-pattern-that-i-provided-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76394390/how-to-replace-a-string-defined-by-starting-and-ending-index-by-another-string-i"
## [1] "Current URL: https://stackoverflow.com/questions/76394371/create-columns-based-on-the-values-in-another-data-frame"
## [1] "Current URL: https://stackoverflow.com/questions/76394359/simulating-a-spatio-temporal-gaussian-process"
## [1] "Current URL: https://stackoverflow.com/questions/76394250/negative-gvif1-2df-values"
## [1] "Current URL: https://stackoverflow.com/questions/76394080/single-season-occupancy-modeling-in-unmarked-how-to-create-a-model-averaged-pred"
## [1] "Current URL: https://stackoverflow.com/questions/76394018/node-inconsistent-with-parents"
## [1] "Current URL: https://stackoverflow.com/questions/76393934/how-do-add-sign-of-rows-in-one-column-to-rows-in-new-column-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76393814/complete-missing-non-overlapping-date-ranges"
## [1] "Current URL: https://stackoverflow.com/questions/76393742/using-partykit-to-predict-survival-probabilities"
## [1] "Current URL: https://stackoverflow.com/questions/76393686/how-to-set-external-variable-or-program-for-rstudio"
## [1] "Current URL: https://stackoverflow.com/questions/76393636/interpretation-of-roc-curve-curving-early"
## [1] "Current URL: https://stackoverflow.com/questions/76393577/how-can-i-find-the-length-of-row-wise-set-differences-between-two-list-columns-u"
## [1] "INPUT URL: https://stackoverflow.com/questions/tagged/r?tab=newest&page=2&pagesize=50"
## [1] "Current URL: https://stackoverflow.com/questions/76393401/switching-columns-within-a-geom-col-plot-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76393378/how-to-replace-multiple-occurrences-of-a-pattern-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76393250/bslibvalue-box-within-bs4dash-displays-not-as-intended"
## [1] "Current URL: https://stackoverflow.com/questions/76393244/how-can-i-disregard-the-theme-for-1-slide-in-xaringan"
## [1] "Current URL: https://stackoverflow.com/questions/76393217/how-to-plot-multiple-plots-in-r-for-different-variables"
## [1] "Current URL: https://stackoverflow.com/questions/76393207/rselenium-timeout-error-while-trying-to-connect-to-server-running-on-docker-usi"
## [1] "Current URL: https://stackoverflow.com/questions/76392914/removing-unwanted-special-etc-and-html-a8-etc-characters-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76392902/how-to-launch-second-shiny-modal-based-on-an-event-in-the-main-shiny-app-ui-and"
## [1] "Current URL: https://stackoverflow.com/questions/76392893/how-to-format-negative-numbers-with-parenthesis-with-openxlsx-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76392884/complex-conditional-df-subsetting-with-nested-for-loops-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76392840/how-can-i-conditionally-assign-values-to-a-column-in-data-table-using-a-function"
## [1] "Current URL: https://stackoverflow.com/questions/76392784/how-do-i-automatically-plot-overlapping-curves-with-ggplot2"
## [1] "Current URL: https://stackoverflow.com/questions/76392725/setting-dynamic-yaxis-labels-ggplot"
## [1] "Current URL: https://stackoverflow.com/questions/76392709/create-a-custom-legend-for-facet-grid-variables"
## [1] "Current URL: https://stackoverflow.com/questions/76392631/r-using-group-by-for-all-values"
## [1] "Current URL: https://stackoverflow.com/questions/76392610/how-do-i-fix-errors-when-renaming-observations-in-r-for-filtering-purposes"
## [1] "Current URL: https://stackoverflow.com/questions/76392410/how-can-i-make-movies-from-the-movies-dataset-with-multiple-genres-only-have-1"
## [1] "Current URL: https://stackoverflow.com/questions/76392409/how-can-i-add-the-overall-histogram-in-a-grouped-histogram-plot"
## [1] "Current URL: https://stackoverflow.com/questions/76392386/how-do-i-add-a-var-constraint-to-portfolioanalytics-optimization-function-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76392367/write-to-google-sheet-skipping-1st-row"
## [1] "Current URL: https://stackoverflow.com/questions/76392347/mixed-model-ancova-when-variables-are-not-of-the-same-length"
## [1] "Current URL: https://stackoverflow.com/questions/76392326/rmarkdown-how-to-use-results-asis-and-fig-show-hold-together"
## [1] "Current URL: https://stackoverflow.com/questions/76392259/how-do-i-plot-my-data-through-in-discrete-time-bins"
## [1] "Current URL: https://stackoverflow.com/questions/76392228/lmer-gives-estimates-for-non-existing-variables-after-equation-with-interaction"
## [1] "Current URL: https://stackoverflow.com/questions/76392177/how-to-assign-unique-observations-to-values-within-a-group"
## [1] "Current URL: https://stackoverflow.com/questions/76392149/linear-programming-problem-r-using-lpsolveapi"
## [1] "Current URL: https://stackoverflow.com/questions/76392055/how-to-match-multiple-occurrences-of-strings-given-a-start-and-end-pattern-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76391960/r-studio-add-row-with-dynamic-field-name"
## [1] "Current URL: https://stackoverflow.com/questions/76391906/how-to-pass-an-unquoted-column-name-to-a-custom-summary-function-min-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76391856/r-argument-is-not-a-character-vector-enc2utf8path"
## [1] "Current URL: https://stackoverflow.com/questions/76391852/recreate-hist-binning-in-ggplot2-with-geom-histogram"
## [1] "Current URL: https://stackoverflow.com/questions/76391835/format-graph-ggcuminc-cumulative-incidence"
## [1] "Current URL: https://stackoverflow.com/questions/76391830/three-way-interaction-in-regression-does-not-show-the-interaction-terms-and-esti"
## [1] "Current URL: https://stackoverflow.com/questions/76391751/if-match-between-column-id-in-two-different-datasets-then-create-a-new-dataset"
## [1] "Current URL: https://stackoverflow.com/questions/76391676/r-how-to-mutate-strings-into-icons-within-one-cell"
## [1] "Current URL: https://stackoverflow.com/questions/76391552/how-to-extract-a-string-that-starts-and-ends-with-a-specific-pattern-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76391482/in-ggplot2-print-an-expression-in-the-facet-wrap"
## [1] "Current URL: https://stackoverflow.com/questions/76391426/what-directory-do-i-use-to-install-libraries-for-a-container-image-with-r-for-an"
## [1] "Current URL: https://stackoverflow.com/questions/76391151/how-program-to-join-2-different-tables-based-on-which-one-has-the-highest-number"
## [1] "Current URL: https://stackoverflow.com/questions/76391100/r-add-current-row-value-of-adjacent-column-with-previous-row-value-of-current"
## [1] "Current URL: https://stackoverflow.com/questions/76391093/r-analysis-of-number-of-mature-animals"
## [1] "Current URL: https://stackoverflow.com/questions/76390879/how-can-i-edit-shiny-servers-sockjs-websocket-to-add-content-type-headers-to-it"
## [1] "Current URL: https://stackoverflow.com/questions/76390817/is-it-possible-to-conduct-anova-with-a-frequency-variable"
## [1] "Current URL: https://stackoverflow.com/questions/76390798/undefined-columns-selected-when-subsetting-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76390501/is-there-a-way-to-untar-a-file-and-merge-all-the-extracted-files-into-one-for-ef"
## [1] "Current URL: https://stackoverflow.com/questions/76390478/import-decimal-interval-of-months-with-lubridate-to-convert-into-days"
## [1] "Current URL: https://stackoverflow.com/questions/76390426/how-to-change-the-y-axis-to-scientific-annotation-problem-with-characters"
## [1] "Current URL: https://stackoverflow.com/questions/76390374/web-crawling-with-naver-client-id-why-is-my-id-invalid-and-how-can-i-fix-it"
## [1] "Current URL: https://stackoverflow.com/questions/76390329/change-x-or-y-position-of-density-plot"
## [1] "Current URL: https://stackoverflow.com/questions/76390299/r-tapply-how-to-use-index-names-as-a-fun-additional-argument"
## [1] "INPUT URL: https://stackoverflow.com/questions/tagged/r?tab=newest&page=3&pagesize=50"
## [1] "Current URL: https://stackoverflow.com/questions/76390196/error-in-seq-defaultfrom-1-to-total-window-1-by-absstep"
## [1] "Current URL: https://stackoverflow.com/questions/76389748/how-do-i-calculate-the-omega-values-for-my-bifactor-model"
## [1] "Current URL: https://stackoverflow.com/questions/76389710/how-do-i-apply-conditional-statemtents-to-create-a-new-raster-layer-based-on-3-g"
## [1] "Current URL: https://stackoverflow.com/questions/76389697/clean-data-in-r-from-image"
## [1] "Current URL: https://stackoverflow.com/questions/76389613/r-append-matrix-rows-if-condition-is-met"
## [1] "Current URL: https://stackoverflow.com/questions/76389515/retrieve-every-value-between-an-alphanumeric-range-in-r-using-ifelse"
## [1] "Current URL: https://stackoverflow.com/questions/76389510/add-a-column-with-a-string-value-based-on-other-column-values-r"
## [1] "Current URL: https://stackoverflow.com/questions/76389468/how-to-subtract-values-between-months-for-each-group-and-each-year-separately-in"
## [1] "Current URL: https://stackoverflow.com/questions/76389386/r-function-that-summarize-rows-grouped-by-but-disregards-duplicate-strings-and"
## [1] "Current URL: https://stackoverflow.com/questions/76389243/is-it-possible-to-bypass-the-9-argument-limit-in-commandargs-for-r-programs"
## [1] "Current URL: https://stackoverflow.com/questions/76389056/how-to-achieve-k-anonymity-in-r-changing-python-to-r-function"
## [1] "Current URL: https://stackoverflow.com/questions/76389050/cleaning-data-in-r-by-using-a-reference-date"
## [1] "Current URL: https://stackoverflow.com/questions/76389028/is-there-a-way-to-add-section-labels-to-the-x-axis-of-a-plot-using-ggplot2"
## [1] "Current URL: https://stackoverflow.com/questions/76388858/change-the-size-and-orientation-of-legend-title-while-plotting-raster"
## [1] "Current URL: https://stackoverflow.com/questions/76388339/r-data-table-lost-rows-after-order"
## [1] "Current URL: https://stackoverflow.com/questions/76388303/how-can-i-get-the-primary-key-of-a-selected-option-from-a-dataframe-based-select"
## [1] "Current URL: https://stackoverflow.com/questions/76388156/customise-and-order-legends-in-survival-analysis-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76388032/convert-data-frame-with-character-column-to-data-frame-with-integer-column"
## [1] "Current URL: https://stackoverflow.com/questions/76387964/h2o-mean-residual-deviance-for-poisson-family"
## [1] "Current URL: https://stackoverflow.com/questions/76387937/rearranging-a-dataframe-creation-of-new-columns-and-pivot-to-wide-format-based"
## [1] "Current URL: https://stackoverflow.com/questions/76387472/tidyrpivot-longer-with-duplicate-problems-with-no-apparent-duplicate-column"
## [1] "Current URL: https://stackoverflow.com/questions/76387441/obtaining-predicted-values-of-the-outcome-variable-using-margins-command"
## [1] "Current URL: https://stackoverflow.com/questions/76387313/expectation-step-in-gaussian-mixture-model-for-matrix-data-not-producing-proper"
## [1] "Current URL: https://stackoverflow.com/questions/76387295/error-in-installing-glmnet-4-1-7-in-r-4-2-3-in-rhel-7-9"
## [1] "Current URL: https://stackoverflow.com/questions/76387189/fatal-error-relating-to-include-s-h-when-installing-r-scalop-package"
## [1] "Current URL: https://stackoverflow.com/questions/76387115/ggplot2-dodge-overlapping-when-y-axis-is-not-count-frequency"
## [1] "Current URL: https://stackoverflow.com/questions/76387096/why-are-the-fitting-results-of-the-arima-and-glm-function-different"
## [1] "Current URL: https://stackoverflow.com/questions/76386943/incompatible-shapes-1024-3-vs-1024-1024-in-neural-network-training"
## [1] "Current URL: https://stackoverflow.com/questions/76386904/how-to-merge-columns-and-rows-in-r-so-that-i-can-calculate-the-average-number-o"
## [1] "Current URL: https://stackoverflow.com/questions/76386871/how-do-i-automatically-set-a-custom-path-for-r-packages-when-using-ms-visual-stu"
## [1] "Current URL: https://stackoverflow.com/questions/76386843/how-to-use-dplyr-verbs-rename-and-mutate-in-a-same-chunk"
## [1] "Current URL: https://stackoverflow.com/questions/76386840/why-is-remove-punct-not-removing-apostrophes-when-tokenizing-a-corpus-in-quanted"
## [1] "Current URL: https://stackoverflow.com/questions/76386786/after-stat-and-glue-do-not-work-together"
## [1] "Current URL: https://stackoverflow.com/questions/76386732/how-do-i-stop-highlighted-r-code-runs-twice"
## [1] "Current URL: https://stackoverflow.com/questions/76386620/fisher-exact-using-tidyr-or-dplyr-approach"
## [1] "Current URL: https://stackoverflow.com/questions/76386429/how-can-i-download-pdfs-from-a-website-that-stores-them-on-aws-using-rvest-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76386409/smoothing-splines-is-there-a-way-to-automatically-add-s-to-each-column-name"
## [1] "Current URL: https://stackoverflow.com/questions/76386402/compatibility-issue-between-dplyr-versions-in-rstudio-error-with-mutate-functio"
## [1] "Current URL: https://stackoverflow.com/questions/76386356/how-can-i-optimize-this-pattern-matching-function-to-handle-large-data-sets-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76386347/how-can-i-convert-a-data-table-to-its-logarithm-1-base-2-form-in-r-while-ign"
## [1] "Current URL: https://stackoverflow.com/questions/76386335/how-do-i-collapse-data-frame-rows-if-values-are-identical-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/76386256/export-a-plotly-graph-in-r-to-powerpoint"
## [1] "Current URL: https://stackoverflow.com/questions/76386162/is-there-a-way-to-merge-two-mutated-dataframes-together"
## [1] "Current URL: https://stackoverflow.com/questions/76386109/how-can-i-control-for-multiple-categorical-variables-in-my-regression-analysis-o"
## [1] "Current URL: https://stackoverflow.com/questions/76386104/why-are-the-bars-inverted-on-the-x-axis-when-using-100-stacked-bar-plots-with-g"
## [1] "Current URL: https://stackoverflow.com/questions/76386043/why-do-i-have-different-p-values-if-calculated-differently"
## [1] "Current URL: https://stackoverflow.com/questions/76386018/trying-to-create-a-date-column-for-tibble-want-to-take-the-values-from-the-inde"
## [1] "Current URL: https://stackoverflow.com/questions/76385903/randomly-selecting-50-columns-in-r-data-table-results-in-table-with-only-50-rows"
## [1] "Current URL: https://stackoverflow.com/questions/76385735/linearly-interpolating-values-in-a-large-r-data-frame-grouped-by-date-using-tidy"
## [1] "Current URL: https://stackoverflow.com/questions/76385712/what-is-causing-my-ggplot2-boxplot-to-turn-into-skinny-lines-instead-of-wide-box"
## [1] "INPUT URL: https://stackoverflow.com/questions/tagged/r?tab=newest&page=9817&pagesize=50"
## Warning in get_reputation(doc): Reputation scores do not match! Nrep: 7 OldRep:
## 4
## [1] "Current URL: https://stackoverflow.com/questions/255697/is-there-an-r-package-for-learning-a-dirichlet-prior-from-counts-data"
## [1] "Current URL: https://stackoverflow.com/questions/127137/sample-code-for-r"
## [1] "Current URL: https://stackoverflow.com/questions/103312/how-to-test-for-the-eof-flag-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/102056/how-to-search-for-r-materials"
## [1] "Current URL: https://stackoverflow.com/questions/95007/explain-the-quantile-function-in-r"
## [1] "Current URL: https://stackoverflow.com/questions/79709/worse-sin-side-effects-or-passing-massive-objects"
## [1] "Current URL: https://stackoverflow.com/questions/77434/how-to-access-the-last-value-in-a-vector"
## [1] "INPUT URL: https://stackoverflow.comTRUE"
##
##
## | id|views |votes |tags |question-time |OwnerUsername | ownerId|reputation |body |badgeGold |badgeSilver |badgeBronze |Editor |EditTime |
## |--------:|:-----|:-----|:-----------------------------------------------------------------------------|:--------------------|:---------------------------|--------:|:----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------|:-----------|:-----------|:-----------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
## | 76398208|14 |0 |r,mean,raster |2023-06-03 22:06:34Z |Michael_Brun | 21621075|17 |I have various .tiff files (3 files per month (e.g. 20060103.tif, 20060113.tif, 20060124.tif, 20070203.tif. 20070213.tif, 20070224.tif...etc). I am calculating monthly mean using following code in R. But the problem is that the cell values of tiff files is in the range 0-1 but the final output of the code has cell values in the range 0-10000. I also checked it manually. For example, a cell whose monthly mean output should be .1265 has monthly mean as 1265. I don't understand that in which step, the values are getting multiplied by 10000 and how?
## setwd("D:/Data")
## library(raster)
## rast <- list.files(path="D:/Data/",pattern='.*tif',full.names=TRUE)
## rast[1:519]
## s <- stack(rast)
## names(s)[1:519]
## grp = substr(names(s),2,7)
## grp[1:519]
## alb_mean <- stackApply(s, grp, mean)
## names(alb_mean)[1:519]
## for(n in names(alb_mean)){writeRaster(alb_mean[[n]], paste0(n,".tif"))}
## list.files(path=".",pattern=".tif") |0 |0 |5 |NA |NA |
## | 76398196|11 |-1 |r |2023-06-03 22:00:45Z |Rossy | 20504490|1 |How can I analyze a complete excel sheet and show the result in a table, classify them by area and by average population and on a map using R, in addition to creating a visualization, the population of a country is analyzed.
## compare columns and totals |0 |0 |0 |NA |NA |
## | 76397984|29 |0 |python,r,scipy,gaussian |2023-06-03 20:46:58Z |JStorey | 11631725|13 |Iâm working to recreate some Python code in R. How can I replicate the results of gaussian_filter() from the SciPy package in R (here)? Iâve tried a number of different functions from a number of different packages, and even after ensuring the parameters of these functions match those in the Python function and testing with very basic matrixes theyâre producing different results. I canât find something that is as simple or obvious as whatâs offered in Python - am I missing something? If not, what approach should I take to recreating that function in R? Thanks very much!
## Here's an example of what I'm trying to achieve in Python:
## gaussian_filter([[1.0,1.0,1.0],[2.0,2.0,2.0],[3.0,3.0,3.0]], sigma=2)
##
## array([[1.88842725, 1.88842725, 1.88842725],
## [2. , 2. , 2. ],
## [2.11157275, 2.11157275, 2.11157275]])
##
## This is the R function that I most expected to work, but the output doesn't match the output from the Python function:
## mmand::gaussianSmooth(matrix(rep(1:3, 3), ncol = 3), 2)
##
## [,1] [,2] [,3]
## [1,] 1.841918 1.841918 1.841918
## [2,] 2.000000 2.000000 2.000000
## [3,] 2.158082 2.158082 2.158082 |0 |0 |4 |JStorey |2023-06-03 22:40:34Z |
## | 76397812|24 |1 |r,multiprocessing,hpc,seurat |2023-06-03 19:52:33Z |user15141432 | 15141432|31 |I am trying to run FindClusters() on a dataset of about 20G, 300K cells using the following command on a RedHat Linux HPC:
## df <- FindClusters(df, resolution=seq(0.01,1,by=0.1), verbose = TRUE,algorithm=1)
## When running, I get the following output in my file, however, the code then seems to freeze after the following input.
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## ****************************************************
##
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 311049
## Number of edges: 5724294
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9925
## Number of communities: 2834
## Elapsed time: 146 seconds
##
## After this message, the program continues to run but doesn't seem to progress forward. Only after about 8 hours, does the next output for the next resolution value print. The program does work on a smaller dataset much faster (2G and about 4 min), and completes without any error, however, I don't believe that a 20G dataset should take this long.
## These are the RedHat HPC system settings, and I am submitting my job via the following qsub command: qsub multiCore.sh
## NAME="Springdale Linux"
## VERSION="7.9 (Verona)"
## ID="rhel"
## ID_LIKE="fedora"
## VERSION_ID="7.9"
##
## cat multiCore.sh
## #!/bin/bash
## #$ -N MULTICORE
## #$ -cwd -S /bin/bash
## #$ -l mem=50G,time=6::
## #$ -pe orte 4
## #$ -o output.log
## #$ -e error.log
##
##
## # Run the MPI job with mpirun
## mpirun $HOME/bin/Rscript integrate2.R // integrate2.R is the file where FindClusters() is called
##
## So far, I have tried looking through the Seurat documentation for parallel processing with future and running one resolution value at a time, but the issue is still there.
## Any suggestions would be appreciated. |0 |0 |5 |user15141432 |2023-06-03 21:30:09Z |
## | 76397805|35 |0 |r,matrix |2023-06-03 19:50:57Z |stats_noob | 13203841|5,117 |I am working with the R Programming Language.
## I have the following data frames:
## set.seed(123)
##
## df_1 <- data.frame(
## name_1 = c("john", "david", "alex", "kevin", "trevor", "xavier", "tom", "michael", "troy", "kelly", "chris", "henry", "taylor", "ryan", "peter"),
## lon = rnorm(15, mean = -74.0060, sd = 0.01),
## lat = rnorm(15, mean = 40.7128, sd = 0.01)
## )
##
## df_2 <- data.frame(
## name_2 = c("matthew", "tyler", "sebastian", "julie", "anna", "tim", "david", "nigel", "sarah", "steph", "sylvia", "boris", "theo", "malcolm"),
## lon = rnorm(14, mean = -74.0060, sd = 0.01),
## lat = rnorm(14, mean = 40.7128, sd = 0.01)
## )
##
## My Problem: I want to find out the distance (e.g. haversine distance) between all people from df_1 and df_2 and then perform some summary statistics (e.g. for each person in df_1 - what is the distance to the closest person df_2, furthest person in df_2, average person in df_2, etc.)
## I think I know how to calculate the distance between all pairs of people:
## library(geosphere)
##
## haversine_distance <- function(lon1, lat1, lon2, lat2) {
## distHaversine(c(lon1, lat1), c(lon2, lat2))
## }
##
##
## distances <- matrix(nrow = nrow(df_1), ncol = nrow(df_2))
## # calculate the distances
## for (i in 1:nrow(df_1)) {
## for (j in 1:nrow(df_2)) {
## distances[i, j] <- haversine_distance(df_1$lon[i], df_1$lat[i], df_2$lon[j], df_2$lat[j])
## }
## }
##
## rownames(distances) <- df_1$name_1
## colnames(distances) <- df_2$name_2
##
## The distance matrix looks like this:
## matthew tyler sebastian julie anna tim david nigel sarah steph sylvia boris theo malcolm
## john 1052.8789 3247.1522 2729.5524 2786.8618 1617.3737 2333.5141 1948.9521 2064.2697 2048.1926 489.7949 2243.3701 423.1875 3760.5821 2663.365
## david 965.2928 1805.2741 1380.9300 1423.8518 941.3102 1009.7809 715.3872 602.9369 605.3148 977.5747 895.8301 1134.0782 2440.0083 2026.501
## alex 3660.8024 1824.7070 1828.6055 1765.7750 3120.1155 2221.2910 2612.6494 2554.4279 2657.5071 4057.3500 2715.1428 4154.0464 2428.5429 2886.329
## kevin 638.9259 2054.2783 1412.4940 1467.9787 639.6901 1017.4015 644.2898 820.1698 887.2768 834.5504 1217.3939 937.2919 2746.7912 1775.239
## trevor 1887.7327 807.6626 650.9167 631.9494 1511.9586 640.8622 884.1570 520.1834 603.2698 2094.4664 747.7145 2232.6381 1679.2678 2085.071
## xavier 2756.8628 1697.5262 1013.2502 972.9730 2190.7641 1396.8569 1767.1953 1893.9403 2051.9317 3237.6654 2239.1811 3302.8378 2571.7172 1878.935
## tom 1587.6118 1192.5724 420.2596 447.8639 1151.8284 243.6729 530.4778 488.9710 675.8181 1903.4973 975.1733 2011.5490 2077.7307 1695.655
## michael 2867.3117 825.7050 1950.4505 1912.7913 2672.4188 1954.2652 2093.8370 1504.5642 1361.1719 2768.1872 1012.5247 2967.4984 581.9209 3407.516
## troy 2351.8427 549.4500 1383.4185 1352.5158 2107.4240 1365.1343 1513.8307 941.2245 828.5550 2349.1859 560.1101 2531.9797 1035.0599 2818.647
## kelly 2169.1191 568.8260 1158.1408 1130.9239 1894.6183 1131.2607 1291.3101 738.7638 658.7015 2219.9931 491.6430 2392.3424 1239.3253 2584.679
## chris 3291.8547 1427.3840 1455.8430 1389.1659 2766.7982 1841.1226 2232.4323 2141.2351 2239.5804 3660.7231 2294.9747 3764.9145 2106.7084 2651.150
## henry 415.8956 2251.7187 1453.0914 1516.5279 394.8684 1062.2968 670.9220 1027.9476 1129.8791 859.7761 1481.0100 894.5358 2989.7708 1552.172
## taylor 1174.1631 1537.5152 746.6206 799.1515 782.6833 358.4041 170.5171 439.8022 635.0847 1504.8199 1016.1560 1601.9382 2359.4940 1567.225
## ryan 2625.2224 342.8375 1052.6759 989.0322 2217.8837 1271.4270 1593.4782 1243.7473 1268.9269 2821.0950 1222.0676 2967.2978 1248.0710 2587.342
## peter 830.2758 2655.1120 2213.5778 2265.3747 1276.1912 1821.6048 1455.0204 1487.1033 1458.7784 195.4891 1651.2295 414.2948 3176.7362 2416.527
##
## My Question: I am struggling to convert this matrix into a dataframe. I am trying different approaches.
## Here are the codes I am using for the summary statistics:
## # Approach 1
##
##
## final <- data.frame(
## name_1 = rep(df_1$name_1, each = nrow(df_2)),
## lon_1 = rep(df_1$lon, each = nrow(df_2)),
## lat_1 = rep(df_1$lat, each = nrow(df_2)),
## name_2 = rep(df_2$name_2, nrow(df_1)),
## lon_2 = rep(df_2$lon, nrow(df_1)),
## lat_2 = rep(df_2$lat, nrow(df_1)),
## distance = c(distances)
## )
##
## final_summary <- aggregate(distance ~ name_1,
## data = final,
## FUN = function(x) c(min = min(x),
## max = max(x),
## mean = mean(x),
## median = median(x),
## sd = sd(x)))
## final_summary_1 <- do.call(data.frame, final_summary)
## names(final_summary_1)[-(1)] <- c("min_distance", "max_distance", "mean_distance", "median_distance", "sd_distance")
##
## # Approach 2
##
## final_summary_2 <- data.frame(name_1 = df_1$name_1,
## min_distance = apply(distances, 1, min),
## max_distance = apply(distances, 1, max),
## mean_distance = apply(distances, 1, mean),
## median_distance = apply(distances, 1, median),
## sd_distance = apply(distances, 1, sd))
##
##
## #Approach 3 (https://stackoverflow.com/a/76394618/13203841 - optimized for speed)
##
## haversine_distance <- function(lon1, lat1, lon2, lat2) {
## distHaversine(cbind(lon1, lat1), cbind(lon2, lat2))
## }
##
## grid <- expand.grid(i = seq_len(nrow(df_1)), j = seq_len(nrow(df_2)))
##
## #create master data frame with the pairwise information name & locations
## workingdf <- data.frame(
## name_1 = df_1$name_1[grid$i],
## lon_1 = df_1$lon[grid$i],
## lat_1 = df_1$lat[grid$i],
## name_2 = df_2$name_2[grid$j],
## lon_2 = df_2$lon[grid$j],
## lat_2 = df_2$lat[grid$j]
## )
##
## #calculate the distances for every row, taking the columns as input
## workingdf$distance <- distHaversine(workingdf[ ,c("lon_1", "lat_1")], workingdf[ ,c("lon_2", "lat_2")])
##
## #summarize
## final_summary_3 <- aggregate(
## distance ~ name_1,
## data = workingdf,
## FUN = function(x) c(min = min(x), max = max(x), mean = mean(x), median = median(x), sd = sd(x))
## )
##
## When I analyze the results for "alex" (approach_1, approach_2, approach_3):
## name_1 min_distance max_distance mean_distance median_distance sd_distance
## 1 alex 342.8375 2729.552 1416.779 1397.956 716.7869
##
## name_1 min_distance max_distance mean_distance median_distance sd_distance
## alex alex 1765.775 4154.046 2749.092 2635.078 774.061
##
## name_1 distance.min distance.max distance.mean distance.median distance.sd
## 1 alex 1765.775 4154.046 2749.092 2635.078 774.061
##
## And when I then compare it to the values of the original matrix:
## distances["alex", ]
## matthew tyler sebastian julie anna tim david nigel sarah steph sylvia boris theo malcolm
## 3660.802 1824.707 1828.605 1765.775 3120.115 2221.291 2612.649 2554.428 2657.507 4057.350 2715.143 4154.046 2428.543 2886.329
##
## I notice that:
## Approach 2 and Approach 3 produce the same results
## The values for Approach 2 and Approach 3 seem to be correct
## The values in Approach 1 (e.g. alex min distance 342 meters) do not even appear in the original matrix
## Thus - am I correct to conclude that Approach 1 is incorrect while Approach 2 and Approach 3 are both correct?
## Thanks! |3 |21 |72 |NA |2023-06-03 21:21:50Z |
## | 76397627|24 |1 |r,ggplot2 |2023-06-03 19:02:49Z |Guilherme Lopes de Campos | 13794490|19 |I am plotting a bar graph. However, the axes x show in different month order. Using the command reoder_within, doesn't work for the purpose.
## Follow below the ME.
## ggplot(de, aes(fill=Cidade, y = Leitura , x = Mes ))+geom_bar(position='dodge', stat='identity')
##
## Generate the follow plot:
## Plot
## My purpose is modify the axes x to: Jan, Fev, March .....
## The set of data is:
## Cidade Mes Leitura
## <chr> <chr> <dbl>
## 1 Petrolina Janeiro 74.2
## 2 Petrolina Fevereiro 73.2
## 3 Petrolina Março 68.7
## 4 Petrolina Abril 42.9
## 5 Petrolina Maio 9.84
## 6 Petrolina Junho 8.02 |0 |0 |3 |Phil |2023-06-03 21:06:20Z, 2023-06-03 19:14:47Z |
## | 76397512|21 |0 |r,ggplot2,legend |2023-06-03 18:36:23Z |pcantalupo | 2992891|2,204 |I'm running ggplot2 v3.4.1. I created this 2 legend plot that by default it is placing the year2 size legend below the cty color legend. However, I would like the size legend to be on top.
## library(tidyverse)
##
## mpg$year2 = factor(mpg$year)
## values = c(2,4); names(values) = c("1999", "2008")
## p = mpg %>%
## ggplot(aes(x = cty, y = hwy, color = cty, size = year2)) +
## geom_point() +
## scale_size_manual(name = "year2", values = values)
## p
##
##
## Therefore, I used guides() to specify the legend ordering but it changes the continuous color legend cty to discrete
## p + guides(size = guide_legend(order = 1),
## color = guide_legend(order = 2))
##
##
## I saw this post ggplot guide_legend argument changes continuous legend to discrete but am unable to figure out how to use guide_colorbar() when you have 2 or more legends.
## How do I change my code to keep the cty legend as continuous? Thx |0 |17 |27 |NA |NA |
## | 76397435|21 |1 |r |2023-06-03 18:20:59Z |Lara | 15804062|115 |# Creating an empty dataframe
## result_df <- data.frame()
##
## # Iterating over the numbers 1 to 3
## for (i in 1:3) {
## # Create a new column and populate it with values of i
## result_df[, paste0("Column", i)] <- i
## }
##
## I expect to have the following populated dataframe:
## Column1 Column2 Column3
## 1 1 2 3 |0 |0 |7 |NA |NA |
## | 76397278|19 |0 |css,r,json,shiny,dt |2023-06-03 17:45:44Z |Curious Jorge - user9788072 | 9788072|2,276 |I'm looking for examples of how to add multiple column headers to a table rendered with DT table in R Shiny but the examples of how to do this can be convoluted. Mostly adding JS and CSS but it can be difficult to pick out what are the necessary items to include in your code to add the column headers to the table.
## So let's start with this super-simple code example below. How to add multiple column headers, following the illustration below where I added mock headers in blue to show what I'm trying to add for the sake of example? Black font is fine, I only used blue to delineate what I added using Paint versus what was rendered by Shiny. Boxes around the headers are helpful.
##
## Code:
## library(shiny)
## library(DT)
##
## ui <- fluidPage(DTOutput('tbl'))
##
## server <- function(input, output) {
## output$tbl = renderDT(iris, options = list(lengthChange = FALSE))
## }
##
## shinyApp(ui,server) |0 |9 |20 |NA |NA |
## | 76397124|33 |1 |python,r,diagram,interaction,docking |2023-06-03 17:07:18Z |ginn | 20290723|95 |I've docked a large inorganic compound (receptor) with an organic molecule that isn't a protein (ligand). I wanted to see the interaction between them in a 2D diagram, but BIOVIA Discovery Studio (DS) doesn't work well with pdb files converted from pdbqt. And since the receptor isn't a protein or a fragment of DNA/RNA, DS won't show the diagrams as well (the same with PLIP and Proteins Plus). I've also tried LigPlot, LeView and PoseView, but none of them worked. Does anyone know another software that draws 2D diagrams, and, if not, would there be any way to do this in R??? (The language I have most affinity and experience in)
## @M__ kindly suggested using Python, in which I don't have any experience yet. So... Would anyone have, perhaps, any ideia of a package I could use to draw 2D diagrams of intermolecular interactions?
## I thank you in advance.
## DATA
## ⢠Sample PDB file (the receptor and ligand's file extension):
## HEADER EXAMPLE PROTEIN 03-JUN-2023 PDB ID: 1XYZ
## TITLE CRYSTAL STRUCTURE OF EXAMPLE PROTEIN
## COMPND EXAMPLE PROTEIN
## AUTHOR J. Smith, A. Johnson
## REMARK THIS IS AN EXAMPLE REMARK
## REMARK THIS IS ANOTHER EXAMPLE REMARK
## SEQRES 1 A 100 ALA SER GLY GLN LEU CYS LEU GLY THR ...
## HETATM 1 N ALA A 1 2.365 4.665 -0.486 1.00 20.00 N
## HETATM 2 CA ALA A 1 1.348 5.580 -0.791 1.00 20.00 C
## HETATM 3 C ALA A 1 0.081 4.867 -0.397 1.00 20.00 C
## ...
## ATOM 399 N LEU A 50 -2.883 -1.380 -0.738 1.00 20.00 N
## ATOM 400 CA LEU A 50 -4.093 -0.617 -0.419 1.00 20.00 C
## ATOM 401 C LEU A 50 -5.337 -1.508 -0.714 1.00 20.00 C
## ...
## CONECT 1 2
## CONECT 2 1 3
## CONECT 3 2 4
## ...
## END
##
## ⢠(A PDBQT file is a PDB file processed and read by AutoDock... That's why other softwares usually don't work well with these files.)
## ⢠Sample 2D diagram of intermolecular interactions:
##
## Source: MARIN, Ruxandra-Cristina et al. Management of antiretroviral therapy with boosted protease inhibitorsâDarunavir/ritonavir or darunavir/cobicistat. Biomedicines, v. 9, n. 3, p. 313, 2021. |0 |0 |5 |ginn |2023-06-03 18:10:09Z |
## | 76396915|28 |0 |r,list,apply,plyr,sapply |2023-06-03 16:18:38Z |bison2178 | 3135514|713 |I am running same models several times on different subset of data, for example,
## data("mtcars")
## head("mtcars")
##
## table(mtcars$cyl, useNA = "ifany")
##
## I am fitting the model (mpg ~ hp + wt) for each cylinder type.
## foo <- mtcars %>%
## group_by(cyl) %>%
## nest() %>%
## mutate(model= map(data, rms::ols(mpg ~ hp + wt, data = .)))
##
## When I try this
## foo %>%
## {map(.$model, summary)}
##
## I get the model summary from all three models but this does not include the estimates for intercept.
## I am not interested in fitting the model using lm function, I am specifically interested in solutions involving fitting rms::ols() function.
## Any help on aggregating the summaries from multiple models using rms::ols() and that includes estimates for intercept, will be very helpful. Thanks. |0 |8 |21 |bison2178 |2023-06-03 17:27:34Z |
## | 76396811|25 |-1 |r,package,cran,p-value |2023-06-03 15:51:22Z |errenay | 22011971|1 |I'm preparing the package in R language for the CRAN submission. However, I've to calculate the p-value for the one-sample Kolmogorov-Smirnow test using the external call to the function C_pKolmogorov2x from the stats package, i.e.
## 1- .Call(stats:::C_pKolmogorov2x,PACKAGE = "stats", criticalValue, n1)
##
## During checking and building my package it causes warnings:
## Foreign function call to a base package:
## .Call(stats:::C_pKolmogorov2x, ..., PACKAGE = "stats")
## Packages should not make .C/.Call/.External/.Fortran calls to a base
## package. They are not part of the API, for use only by R itself and
## subject to change without notice.
##
## Because I've got a final value of the test statistic, to obtain the p-value I can't use ks.test, which can be applied only if we have a whole sample.
## Does anybody know the other way (e.g., function from other package to use directly) to calculate this p-value using cdf for the K-S one-sample test? Or maybe the warning will not cause rejecting my package from CRAN? Thank you in advance for your help!
## I've tried to found other packages intended to calculate the above-mentioned p-value. However, without success. |0 |0 |0 |errenay |2023-06-03 17:32:20Z |
## | 76396563|31 |0 |r,polygon,logistic-regression,gwr |2023-06-03 14:44:43Z |Saïd Maanan | 4889002|511 |I have the following data set containing socioeconomic variables:
## > glimpse(df)
## Rows: 730,099
## Columns: 9
## $ id <int> 25500, 25501, 25502, 25503, 25504, 25505, 25506, 25507, 25508, 25509, 25510, 25511, 25512, 25513, 25514, 25515, 25516, 255â¦
## $ Prov <fct> Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Al Hoceïma, Alâ¦
## $ Age <dbl> 45, 15, 65, 55, 35, 75, 45, 25, 55, 40, 35, 50, 70, 20, 30, 75, 50, 35, 50, 40, 35, 70, 70, 35, 35, 75, 55, 35, 25, 50, 30â¦
## $ Edu <dbl> 5, 4, 0, 0, 0, 0, 0, 16, 0, 5, 4, 0, 0, 6, 0, 0, 0, 0, 0, 0, 9, 0, 0, 14, 4, 0, 0, 3, 5, 0, 9, 0, 0, 0, 15, 0, 0, 0, 14, 3â¦
## $ Kids <int> 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 4, 1, 0, 2, 0, 3, 2, 2, 1, 0, 5, 0, 2, 0, 0, 0, 1, 0, 2, 2, 1, 0, 5, 0, 0, 3, 0, 0, 1, 0,â¦
## $ Mil <fct> Urban, Rural, Rural, Rural, Rural, Rural, Rural, Rural, Rural, Rural, Rural, Rural, Urban, Urban, Rural, Urban, Urban, Rurâ¦
## $ DSize <dbl+lbl> 2, 1, 4, 4, 1, 1, 3, 2, 3, 4, 1, 6, 3, 2, 5, 3, 4, 2, 4, 2, 3, 6, 3, 2, 4, 4, 3, 2, 2, 4, 1, 4, 6, 4, 5, 5, 4, 2, 5, 3â¦
## $ taille <dbl> 2, 5, 5, 7, 5, 1, 2, 1, 4, 1, 5, 9, 5, 1, 5, 3, 7, 4, 6, 7, 2, 14, 3, 4, 1, 2, 9, 3, 3, 12, 4, 7, 2, 9, 2, 6, 5, 2, 4, 3, â¦
## $ EP <fct> 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,â¦
##
## The variable Prov indicates the 72 provinces of the country, it looks like this:
## > levels(df$Prov)
## [1] "Agadir-Ida-Ou-Tanane" "Al Haouz" "Al Hoceïma" "Meknès" "Azilal"
## [6] "Béni Mellal" "Benslimane" "Berkane" "Berrechid" "Boujdour"
## [11] "Boulemane" "Casablanca" "Chefchaouen" "Chichaoua" "Chtouka-Ait Baha"
## [16] "Driouch" "El Hajeb" "El Jadida" "El Kelâa des Sraghna" "Errachidia"
## [21] "Essaouira" "Fahs-Anjra" "Fès" "Figuig" "Fquih Ben Salah"
## [26] "Guelmim" "Guercif" "Ifrane" "Inezgane-Ait Melloul" "Jerada"
## [31] "Kénitra" "Khémisset" "Khénifra" "Khouribga" "Laâyoune"
## [36] "Larache" "Marrakech" "Médiouna" "Midelt" "Mohammadia"
## [41] "Nador" "Nouaceur" "Ouarzazate" "Ouezzane" "Oujda-Angad"
## [46] "Rabat" "Rehamna" "Safi" "Salé" "Sefrou"
## [51] "Settat" "Sidi Bennour" "Sidi Ifni" "Sidi Kacem" "Sidi Slimane"
## [56] "Skhirate-Témara" "Tanger-Assilah" "Taounate" "Taourirt" "Taroudannt"
## [61] "Tata" "Taza" "Tétouan" "M'Diq-Fnideq" "Tinghir"
## [66] "Tiznit" "Youssoufia" "Zagora" "Moulay Yacoub" "Tan-Tan / Assa-Zag"
## [71] "Es-Semara / Tarfaya" "Oued Ed Dahab / Aousserd"
##
## and I have another shapefile containing the location polygons for each province, it looks like this:
## > map_3
## Simple feature collection with 72 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -17.10496 ymin: 20.7715 xmax: -0.9987581 ymax: 35.92243
## Geodetic CRS: WGS 84
## # A tibble: 72 Ã 2
## PROV geometry
## * <chr> <POLYGON [°]>
## 1 Agadir-Ida-Ou-Tanane ((-9.504189 30.3498, -9.503555 30.34989, -9.503493 30.34991, -9.502746 30.35016, -9.50251...
## 2 Al Haouz ((-7.344661 31.67321, -7.344881 31.67344, -7.345 31.6736, -7.346125 31.67503, -7.346162 3...
## 3 Al Hoceïma ((-3.926041 35.26096, -3.926289 35.26113, -3.926532 35.26142, -3.926776 35.26171, -3.9270...
## 4 Azilal ((-5.908287 32.30598, -5.908415 32.3063, -5.913961 32.32005, -5.915668 32.32325, -5.92306...
## 5 Benslimane ((-7.195987 33.18768, -7.191716 33.18865, -7.169873 33.19075, -7.164904 33.19066, -7.1557...
## 6 Berkane ((-2.313083 34.58991, -2.312776 34.58996, -2.312381 34.59001, -2.308861 34.59052, -2.3085...
## 7 Berrechid ((-7.250729 33.21856, -7.249378 33.22028, -7.246216 33.22429, -7.245882 33.22472, -7.2454...
## 8 Boujdour ((-12.54495 24.46466, -12.49897 24.47041, -12.4321 24.48361, -12.38604 24.49452, -12.3777...
## 9 Boulemane ((-4.202123 32.58081, -4.200447 32.58095, -4.183846 32.58234, -4.17291 32.58488, -4.15931...
## 10 Casablanca ((-7.669198 33.49706, -7.668895 33.49699, -7.66875 33.49705, -7.668693 33.49707, -7.66854...
## # â¹ 62 more rows
## # â¹ Use `print(n = ...)` to see more rows
##
## When I want to run a simple logistic regression I simply run this command and it gives me the results I want:
## > model <- glm(EP~Age+Edu+Kids+Mil+DSize+taille,family = binomial(link = "logit"), data = df)
## > summary(model)
##
## Call:
## glm(formula = EP ~ Age + Edu + Kids + Mil + DSize + taille, family = binomial(link = "logit"),
## data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0040682 0.0165088 -0.246 0.805
## Age -0.0060011 0.0002926 -20.511 <2e-16 ***
## Edu -0.1165767 0.0010738 -108.562 <2e-16 ***
## Kids 0.0927567 0.0038183 24.293 <2e-16 ***
## MilRural 1.5594703 0.0078742 198.048 <2e-16 ***
## DSize -0.4697581 0.0034187 -137.409 <2e-16 ***
## taille -0.1368148 0.0026621 -51.393 <2e-16 ***
## ---
## Signif. codes: 0 â***â 0.001 â**â 0.01 â*â 0.05 â.â 0.1 â â 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 613335 on 719910 degrees of freedom
## Residual deviance: 497842 on 719904 degrees of freedom
## (10188 observations deleted due to missingness)
## AIC: 497856
##
## Number of Fisher Scoring iterations: 6
##
## Now I want to run a Geographically Weighted Logistic Regression, and for that I checked the GWModel package manual, and found the function ggwr.basic, which has an option for logistic regression.
## DM<-gw.dist(dp.locat=coordinates(londonhp))
## bw.f2 <- bw.ggwr(BATH2~FLOORSZ,data=londonhp, dMat=DM,family ="binomial")
## res.binomial<-ggwr.basic(BATH2~FLOORSZ, bw=bw.f2,data=londonhp, dMat=DM,
## family ="binomial")
##
## But in their example, the location variable is a point (X Y coordinates), whereas in my case the locations are polygons, also they have one observation per location (they use the LondonHP data set), whereas I have more than 730,000 observations scattered across 72 provinces.
## Therefore, how can I run a Geographically Weighted Logistic Regression model on my data? I just gave the GWModel package as an example; if you know a different package or function that can get the job done, please feel free to use it. |0 |4 |14 |NA |NA |
## | 76396425|12 |0 |r,text,ncdf4 |2023-06-03 14:03:28Z |AS1 | 12369360|1 |I'm trying to convert NC files (SPEI index) to text in R using a package I found online. I managed to extract the NC files but am unable to convert them into text. I get the following error: Error in d[[s]] : subscript out of bounds
## I am attaching both codes below. The first one to compute the SPEI from cru files resulted in 4 output files: spei01.nc, spei03.nc, spei06.nc, and spei12.nc.
## if (!require('pacman')) install.packages('pacman')
## pacman::p_load(SPEI, ncdf4, snowfall, parallel, Hmisc)
## source('./R/functions.R')
##
## # Init a parallel computing cluster; modify the parameter `cpus` to the
## # desired number of cores; otherwise, use all available cores
## #sfInit(parallel=TRUE, cpus=detectCores())
## sfInit(parallel=TRUE, cpus=4)
## sfExport(list='spei', namespace='SPEI')
##
## # Compute SPEI at all time scales between 1 and 48 and store to disk
## #for (i in c(1:48)) {
## for (i in c(1,3,6,12)) {
## spei.nc(
## sca=i,
## inPre='./inputData/cru_ts4.05.1901.2020.pre.dat.nc',
## inEtp='./inputData/cru_ts4.05.1901.2020.pet.dat.nc',
## outFile=paste('./outputNcdf/spei',
## formatC(i, width=2, format='d', flag='0'),'.nc',sep=''),
## title=paste('Global ',i,'-month',
## ifelse(i==1,'','s'),' SPEI, z-values, 0.5 degree',sep=''),
## comment='Using CRU TS 4.05 precipitation and potential evapotranspiration data',
## block=36,
## inMask=NA,
## tlapse=NA
## )
## gc()
## }
##
## # Stop the parallel cluster
## sfStop()
## type here
##
## Here is the second one to convert the NC files to text:
## library(ncdf4)
##
## #ff <- list.files('..','SPEIpen',full.names=TRUE)
## #system(paste('gzip -d',ff))
##
## # open the netCDF files to read
## ff <- list.files('./outputNcdf','*.nc',full.names=TRUE)
## d <- list()
## for (i in 1:length(ff)) d[[i]] <- nc_open(ff[i])
##
## # a matrix of coordinates for which we want to extract the data
## co <- matrix(NA,ncol=2,nrow=720*360)
## colnames(co) <- c('lon','lat')
## co[,1] <- rep(d[[1]]$dim$lon$vals,360)
## co[,2] <- sort(rep(d[[1]]$dim$lat$vals,720),decreasing=TRUE)
## co <- co[83260:259200,]
##
## toText <- function(co, d) {
## require(ncdf4)
## lons <- d[[1]]$dim$lon$vals
## lats <- d[[1]]$dim$lat$vals
## tims <- d[[1]]$dim$time$vals
## lo <- co[1]
## la <- co[2]
## if (file.exists(paste('./outputTxt/spei_',lo,'_',la,'.csv.gz',sep='')))
## return()
## x <- which(lons==lo)
## y <- which(lats==la)
## z <- ncvar_get(d[[1]],'spei',c(x,y,1),c(1,1,1))
## t <- length(tims)
## if (is.na(z)) {
## rm(lons,lats,lo,la,x,y,z)
## return()
## }
## a <- matrix(NA,ncol=48,nrow=t)
## scales <- formatC(1:48, width=2, format='d', flag='0')
## colnames(a) <- paste('SPEI',scales,sep='')
## for (s in 1:48) {
## a[,s] <- ncvar_get(d[[s]],'spei',c(x,y,1),c(1,1,-1))
## }
## b <- 'Description: Standardized Precipitation Evapotranspiration Index (SPEI) at timescales between 1 and 48 months. Each column is a time series of monthly SPEI values starting on January 1901. Units: standard (Z) scores, i.e. normally distributed values with mean 0 and unit standard deviation. Missing value: NA. Not a number: nan. Version: 2.5 - July 2017. Comment: Using Penman equation for ET. Creators: Santiago BeguerÃa - santiago.begueria_add_the_at_symbol_csic.es and Sergio Vicente-Serrano - svicen_add_the_at_symbol_ipe.csic.es.'
## c <- paste('./outputTxt/spei_',lo,'_',la,'.csv',sep='')
## write.table(b,c,row.names=FALSE,col.names=FALSE)
## write.table(round(a,5),c,append=TRUE,row.names=FALSE,col.names=TRUE,sep=',',quote=FALSE)
## system(paste('gzip',c))
## #return(print(paste(lo,la,sep=', ')))
## rm(lons,lats,lo,la,x,y,z,a,b,c)
## gc()
## return()
## }
## # toText(co[20000,],d)
##
## apply(co, MARGIN=1, FUN=toText, d)
##
## # Parallel: doesn't work due to multiple access to same files
## #library(snowfall)
## #sfInit(parallel=TRUE,cpus=10)
## #sfApply(co,margin=1,fun=toText,d)
## #sfStop()
##
## #system(paste('gzip',ff))
##
##
## Any help in figuring out what went wrong would be greatly appreciated. I have never really dealt with NC files and I'm relatively new to R.
## I tried both codes but the second one to convert NC to text resulted in an error: Error in d[[s]] : subscript out of bounds, which suggests an issue with the matrix. |0 |0 |3 |NA |NA |
## | 76396351|16 |0 |r,raster,sentinel,satellite |2023-06-03 13:40:26Z |Johnny | 22013540|1 |I ama trying to merge two RastertStack satellite data located just about over each other. I want to have them both in a single plot and I already set their extent to ESPG:3857.
## I use the code:
## MergedMap<-merge(StackedBands1,StackedBands3,tolerance=0.05, filename="MergedMap",overlap=FALSE,ext=NULL,overwrite=TRUE)
##
## StackBands1 - 4 layers of different bands from Sentinel from location one
## StackBands2 - 4 Layers of different bands from Sentinel from location two
## These two location are just beside each other and I want to have them on a single plot. After running the code they are turned into a rasterbrick and errors shows up while plotting them. There must be something wrong with my code so if someone can help. Thank you :)
## MergedMap<-merge(StackedBands1,StackedBands3,tolerance=0.05,filename="MergedMap",overlap=FALSE,ext=NULL,overwrite=TRUE)
## There were 50 or more warnings (use warnings() to see the first 50)
##
## > plotRGB(MergedMap, r=4,g=3,b=2, axes = TRUE, stretch = "lin", main = "False Color Composite")
## Error in if (x@file@nodatavalue < 0) { :
## missing value where TRUE/FALSE needed
##
## > warning(MergedMap)
## Warning message:
## brick(ncol=10980, nrow=10980, nl=4, xmn=0, xmx=10980, ymn=0, ymx=10980, crs='+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs')
##
## I thought maybe there's no other way to stitch or merge two rasterstack layers and just have to do it on QGIS. But I want to find a way to have them. Do I have to edit the extent of these objects? |0 |0 |0 |Phil |2023-06-03 21:08:32Z |
## | 76396349|25 |0 |r,subset,correlation,pairwise |2023-06-03 13:39:53Z |Tig | 22013069|1 |I have a dataset (con) divided in 2 groups (A and B) and for each sample from group A (1-5), group B has a correspondent sample (1-5), each sample has different measures (zinc, iron, cadmium) pertaining to it.
## My intended goal is making a correlation test between A1-B1;A2-B2;A3-B3,...measures
## My question is, is there a way to make such a individual correlation and then trace a linear model graph or if i can just separate the groups and do a cor.tes() between the two subsets
## I have created two different subsets (conA, conB), using
## cor.tes(conA$Zinc,conB$Zinc, method="spearman",exact=FALSE) to get the overall correlation for each measure
## but I wanted to make each pair wise correlation which i dont think its dont its been done this way. |0 |0 |0 |Progman |2023-06-03 13:40:30Z |
## | 76396323|14 |0 |r,gwmodel,spgwr |2023-06-03 13:30:49Z |gagrochowski | 22009967|1 |I´m trying to run a GWR code with spgwr in R and I´m having a lot of hardships. I´m trying to run a Hedonic Model into GWR, using a point shapefile that I´ve prepared from a database of real estate transactions in my city, but I can´t run the model in any given form. My hedonic model, that is based on a multiple regression model, is extrapolated to GWR, as it is considered as the global regression. It uses numeric and categoric variables, and is used in the code below.
## ## Script - GWR ##
##
## # Step 1: Install Packages
## # ---------------------------
## if (!require("sp")) install.packages("sp")
## library(sp)
## if (!require("tmap")) install.packages("tmap")
## library(tmap)
## if (!require("spdep")) install.packages("spdep")
## library(spdep)
## if (!require("spData")) install.packages("spData")
## library(spData)
## if (!require("sf")) install.packages("sf")
## library(sf)
## if (!require("classInt")) install.packages("classInt")
## library(classInt)
## if (!require("maptools")) install.packages("maptools")
## library(maptools)
## if (!require("GISTools")) install.packages("GISTools")
## library(GISTools)
## if (!require("spgwr")) install.packages("spgwr")
## library(spgwr)
## if (!require("GWmodel")) install.packages("GWmodel")
## library(GWmodel)
## if(!require(rgdal)){install.packages('rgdal')}
## library(rgdal)
## if(!require(terra)){install.packages('terra')}
## library(terra)
##
## # Step 2: Read Shape
## # ---------------------------
## map <- readOGR("E:/SHP/2010/ITBI_TRANS-2010_0.shp")
## head(map@data)
##
## # Step 3: Definition of the Influencing Variables
## # ---------------------------
## bwG_1 <- gwr.sel(map$X._M2_CORR ~ map$DIST_BRT + map$ANO_CONST + map$AREA_CONST + map$P_ACAB + map$TP_CONST + map$I_CENT, data = map, gweight = gwr.Gauss, verbose = FALSE)
##
## gwrG_1 <- gwr(map$X._M2_CORR ~ map$DIST_BRT + map$ANO_CONST + map$AREA_CONST + map$P_ACAB + map$TP_CONST + map$I_CENT, data = map, bandwidth = bwG_1, gweight = gwr.Gauss)
##
## Gaussian.summary_1 <- summary(gwrG_1$SDF)
##
## Gaussian.stats_1 <- gwr.basic(map$X._M2_CORR ~ map$DIST_BRT + map$ANO_CONST + map$AREA_CONST + map$P_ACAB + map$TP_CONST + map$I_CENT, data = map, bw = bwG_1, kernel = "gaussian")
##
## spplot(gwrG_1$SDF, "localR2", col = "black", main = "teste")
##
## ## Save Map ein .shp
## writeOGR(gwrG_1$SDF, dsn = "~/Desktop", overwrite_layer = TRUE, layer = "gwr.shp", driver = "ESRI Shapefile")
##
## The error from this try is described below:
## > Error in gwr(map$X._M2_CORR ~ map$DIST_BRT + map$ANO_CONST + map$AREA_CONST + :
## new data matrix rows mismatch
## > warnings()
## Mensagens de aviso:
## 1: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 2: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 3: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 4: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 5: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 6: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 7: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 8: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 9: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 10: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 11: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 12: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 13: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 14: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 15: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 16: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 17: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 18: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 19: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 20: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 21: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 22: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 23: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 24: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 25: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 26: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 27: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 28: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 29: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 30: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 31: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 32: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 33: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 34: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 35: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 36: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 37: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 38: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 39: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 40: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 41: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 42: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 43: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 44: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 45: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 46: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 47: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 48: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 49: In w.i * weights :
## longer object length is not a multiple of shorter object length
## 50: In w.i * weights :
## longer object length is not a multiple of shorter object length
##
## It appears to be an error with bandwidth matrix, that have one value only. I really don´t know what to do more. Search into the forum and also asked chat GPT, but I don´t have a clue of what is going. I would be greatful with any help. |0 |0 |0 |gagrochowski |2023-06-03 14:19:04Z |
## | 76396291|8 |0 |r,plot,tree |2023-06-03 13:20:46Z |Junjie HE | 22013459|1 |I am doing conditional inference tree analysis using the partykit package in R. I want to plot any tree that is extracted from the forest grown by cforest(). But I got an error message when I am trying with the plot function. The following is a chunk of codes that may produce the like error message with the iris data. Answers are appreciated. Thank you!
## Code:
## library(partykit)
## cf <- cforest(Species ~., data=iris)
## tr <- gettree(cf, tree=1)
## plot(tr)
## Errors in console:
## Error in Summary.factor(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, :
## âminâ not meaningful for factors
## I am expecting plots for individual trees in the cforest() result. |0 |0 |0 |NA |NA |
## | 76396233|11 |0 |r,tidyverse,cluster-computing,purrr |2023-06-03 13:03:43Z |Ranji Raj | 14512983|748 |I am working on the following code to produce some clusters
## #Grouping pupils as per their performance at two intermediate examinations----
## library(tidyverse)
## library(purrr)
## library(cluster)
## student <- read_csv("clustering-student-mat.csv") #In workspace
## #student
## K <- 2:8
## list_clu_res <- vector("list", length(K))
##
## list_clu_res <- map(K, ~kmeans(student %>% select(Exam1, Exam2), centers = .))
## for(i in seq_along(K)) {
## student_clu <- student %>%
## bind_cols(tibble(cluster = list_clu_res[[i]]$cluster)) %>%
## mutate(cluster = factor(cluster))
## # Filter points that lie on a cluster's convex hull
## student_hull <- student_clu %>%
## split(.$cluster) %>%
## map(~ slice(., chull(.$Exam1, .$Exam2))) %>%
## do.call("rbind", .)
## print(ggplot(student_clu, aes(Exam1, Exam2, color = cluster, fill = cluster)) +
## geom_polygon(data = student_hull, alpha = .5, color = "black") +
## geom_point(pch = 21) +
## geom_point(data = student_clu %>%
## group_by(cluster) %>%
## summarize_all(mean), shape = "+", color = "black",
## size = 8) +
## guides(fill = FALSE, color = FALSE) +
## labs(title = str_c("k=", K[i])))
## }
##
## But when I invoked the map() it gives the following error:
## Error in the.map$names : $ operator is invalid for atomic vectors
##
## The code worked few days back but now when I am running again it gives this error.
## Data here
## Any help is appreciated to fix the code. |0 |4 |17 |NA |NA |
## | 76396165|23 |0 |r,geolocation,geospatial,sf,rgdal |2023-06-03 12:47:51Z |Usman YousafZai | 1535580|1,018 |How to create a gridline of 7*7 sqkm using Latitude and Longitude values. These values should be the centroid value of a single square in the grid. I am not sure if I am doing it in the right way. I tried st_make_grid from sf (Simple Features) library but that shows me an empty plot.
## MyGrid <- st_make_grid(DF, cellsize = c(0.07, 0.07), square = TRUE, crs = 4326)
##
## Below is my example DF
## DF <- structure(list(lat = c(43.25724, 43.25724, 43.25724, 43.25616,
## 43.25616, 43.25616), lon = c(-96.01955, -95.98172, -95.92336,
## -96.40973, -96.25733, -96.17735)), class = "data.frame", row.names = c(NA,
## 6L))
##
## ## > DF
## ## lat lon
## ## 1 43.25724 -96.01955
## ## 2 43.25724 -95.98172
## ## 3 43.25724 -95.92336
## ## 4 43.25616 -96.40973
## ## 5 43.25616 -96.25733
## ## 6 43.25616 -96.17735
##
## Thanks |4 |17 |43 |I_O |2023-06-03 13:32:47Z |
## | 76395825|37 |0 |r,class,matrix,numeric |2023-06-03 11:22:41Z |s28 | 21573410|29 |I have several matrices and I would like to apply something like
## class(matrix) <- "numeric" to all of them at once, i.e. the class of all matrices should be changed to numeric.
## Do you know how to do this?
## dput(matrix[1:3,]) results in structure(c(285.789361223578, 282.564165145159, 273.633228540421, 256.789452806115, 260.808130130172, 241.718192100525, 266.765343174338, 267.881099879742, 250.710165724158, 284.365977942944, 281.670583188534, 268.735618144274, 264.118778035045, 262.856532484293, 254.31867428124, 286.250801086426, 284.585711210966, 268.984649181366, 286.17267370224, 284.429456442595, 267.478255555034, 275.10055847466, 274.141056537628, 259.477523118258, 246.454664766788, 252.470473349094, 232.699362188578, 284.998321458697, 283.73363442719, 269.555955678225, 0, 0, 0), dim = c(3L, 11L), dimnames = list(NULL, c("", "", "", "", "", "", "", "", "", "", "vec"))) |0 |0 |5 |s28 |2023-06-03 11:32:37Z |
## | 76395807|8 |0 |r,spatial,kriging,gstat |2023-06-03 11:18:47Z |Candy | 4849505|1 |I am trying to use universal kriging on some irregular data. I keep getting an error:
## Error in chol.default(A) :
## the leading minor of order 2 is not positive definite
## When I run the krigST command.
## Here is a sample of my data:
##
## long
## lat
## date_from
## date_to
## var1
## var2
## var3
## var4
## resp
## tid
## spid
## 144.12
## -33.73
## 2019-01-03
## 2019-02-02
## 700
## 103
## 48.6
## 4.93
## -4.04
## 1
## 1
## 144.46
## -32.59
## 2019-01-03
## 2019-02-02
## 698
## 89
## 65.8
## 16.69
## -2.93
## 1
## 2
## 144.98
## -31.05
## 2019-01-03
## 2019-02-02
## 1947
## 145
## 145.4
## 8.22
## -7.05
## 1
## 3
## 144.46
## -32.59
## 2019-01-07
## 2019-02-06
## 686
## 95
## 61.4
## 7.03
## -3.94
## 2
## 2
## 145.44
## -33.25
## 2019-01-07
## 2019-02-06
## 637
## 151
## 25
## 7.76
## -5.09
## 2
## 4
## 146.15
## -34.01
## 2019-01-07
## 2019-02-06
## 849
## 75
## 11
## 9.86
## -5.46
## 2
## 6
## 145.8
## -33.45
## 2019-02-03
## 2019-03-02
## 704
## 90
## 14.2
## 10.91
## -5.28
## 3
## 5
## 146.15
## -34.01
## 2019-02-03
## 2019-03-02
## 15
## 1
## 12.9
## 8.4
## -3.41
## 3
## 6
## 144.98
## -31.05
## 2019-03-13
## 2019-04-12
## 975
## 153
## 87.3
## 13.09
## -6.1
## 4
## 3
## 145.44
## -33.25
## 2019-03-13
## 2019-04-12
## 1025
## 124
## 33.4
## 22.13
## -6.89
## 4
## 4
## 144.46
## -32.59
## 2019-04-22
## 2019-05-21
## 264
## 192
## 20
## 5.52
## -4.28
## 5
## 2
## 145.44
## -33.25
## 2019-04-22
## 2019-05-21
## 530
## 136
## 17.2
## 9.27
## -5.48
## 5
## 4
## 145.8
## -33.45
## 2019-04-22
## 2019-05-21
## 790
## 87
## 16.6
## 15.63
## -6.57
## 5
## 5
## 145.8
## -33.45
## 2019-05-03
## 2019-06-02
## 1074
## 67
## 19.7
## 9.35
## -6.94
## 6
## 5
## 146.15
## -34.01
## 2019-05-03
## 2019-06-02
## 140
## 7
## 49.1
## 13.98
## -3.12
## 6
## 6
## 144.98
## -31.05
## 2019-06-02
## 2019-07-01
## 1104
## 126
## 21.2
## 17.88
## -6.96
## 7
## 3
## 144.46
## -32.59
## 2019-06-03
## 2019-07-02
## 351
## 174
## 61
## 7.45
## -5.14
## 8
## 2
## 145.44
## -33.25
## 2019-06-03
## 2019-07-02
## 573
## 122
## 1.4
## 13.98
## -6.42
## 8
## 4
## 146.15
## -34.01
## 2019-06-03
## 2019-07-02
## 651
## 53
## 12.4
## 9.06
## -6.21
## 8
## 6
## 145.8
## -33.45
## 2019-07-15
## 2019-08-14
## 1058
## 75
## 11.8
## 12.73
## -6.77
## 9
## 5
##
## This is the code I'm using:
## dat<- read.csv("mydat.csv")
##
##
## #load libraries
## library(gstat)
## library(sp)
## library(spacetime)
##
## #Format lat and long & remove the duplicates
## COOR=unique(cbind(dat$long,dat$lat))
##
## #create SpatialPoints object from coordinates CRS stands for Coordinate reference system
## COOR.formatted=SpatialPoints(COOR,proj4string=CRS("+proj=longlat +datum=WGS84"))
##
## #create index for STSDF object
## (index <- matrix(cbind(dat$spid, dat$tid), nrow= length(dat$spid), ncol=2, byrow = FALSE ))
##
## dataframe<-data.frame(var1 = dat$var1,var2=dat$var2,
## var3 = dat$var3,var4 = dat$var4, resp = dat$resp,
## lat=dat$lat, lat.sq=dat$lat^2, ID=paste("ID",1:nrow(dat)))
## DATE.formatted <- unique(as.Date(dat$date_from))
## DATE.formatted.end <- as.POSIXct(unique(as.Date(dat$date_to)))
## datST = STSDF(COOR.formatted, DATE.formatted, dataframe,
## index, endTime=DATE.formatted.end )
##
##
## summary(datST)
## #################################create some theoretical models###################
## var.uk.o=variogramST(resp~ var1+var2+var3+var4,
## datST,tlags=0:7,
## na.omit=TRUE,)
## plot(var.uk.o)
## type <-c("Exp", "Sph")
## psill=2
## spatialVgm = vgm(psill=psill,type[1], range=120, nugget=0)
## temporalVgm=vgm(psill=psill,type[1], range=150, nugget=0)
## jointVgm = vgm(psill=psill,type[2], range=120, nugget=0)
## fit.ssum.o = vgmST("sumMetric", space = spatialVgm,
## time = temporalVgm,
## joint = jointVgm,
## nugget = 0,
## stAni = 25)
## fit.ssum.o=fit.StVariogram(var.uk.o,
## fit.ssum.o ,fit.method = 8)
## #this is where it goes wrong.
## pred.a <- krigeST(resp~1,
## data=datST, newdata=datST,
## modelList=fit.ssum.o)
## pred.b <- krigeST(resp~var1+var2+var3+var4,
## data=datST, newdata=datST,
## modelList=fit.ssum.o)
## #I tried increasing the value of the response variable but I still get the error.
## dat$resp= dat$resp+10000
##
##
##
## I tried to increase the response variable to move it away from zero, because my data is very noisy, but it doesn't work (I've tried small increments 10, 20, 50....1000, 10000 nothing works. I added the optional endTime option in the STSDF object but that didn't work either. |0 |0 |3 |NA |NA |
## | 76395682|47 |0 |r,dataframe,join,merge |2023-06-03 10:42:15Z |Usman YousafZai | 1535580|1,018 |I have two data frames with Latitude , Longitude and speed values. DF1 has three values after the decimal point and DF2 has five values after the decimal point respectively. I want to merge DF1 and DF2 by Latitude and Longitude but by first 4 values. If the first four values of Lat and Long are matching then merge the rest of column in resultant DF. But I also want to keep the values after three decimal point that are existing in DF2. If the Lat and Long are not matching their first four values then ill ignore it. Below is the example for more clarification.
## Thanks
## DF1
##
## A_Lat
## A_Long
## A_Speed
## 5.667
## 2.443
## 1345
##
## DF1 <- structure(list(A_Lat = c(5.667, 5.667, 5.667, 5.667, 5.663, 5.662
## ), A_Long = c(2.443, 2.445, 2.445, 2.445, 2.445, 2.445), A_Speed = c(1345L,
## 3551L, 3551L, 3551L, 3551L, 3551L)), class = "data.frame", row.names = c(NA,
## 6L))
##
## DF2
##
## B_Lat
## B_Long
## B_Speed
## 5.66781
## 2.44323
## 2345
##
## DF2 <- structure(list(B_Lat = c(5.66781, 5.66783, 5.66743, 5.66723,
## 5.66734, 5.66712), B_Long = c(2.44323, 2.44534, 2.44523, 2.44512,
## 2.44512, 2.44543), B_Speed = c(2345L, 4551L, 5551L, 7551L, 7551L,
## 8551L)), class = "data.frame", row.names = c(NA, 6L))
##
## Resultant DF
##
## R_Lat
## R_Long
## R_B_Speed
## R_A_Speed
## 5.66781
## 2.44323
## 2345
## 1345 |4 |17 |43 |I_O |2023-06-03 11:15:44Z, 2023-06-03 13:24:05Z |
## | 76395579|11 |0 |r,shiny,publish,flexdashboard |2023-06-03 10:14:15Z |Enrique Villa Coronado | 20848352|1 |I want to publish dashboard (flexdashboard) in shinyapp.io (R) but i constantly get the error:
## "Error detecting locale: Error in make.names(col.names, unique = TRUE): invalid multibyte string 3
## (Using default: en_US)"
## I researched and found nothing weird (non-ASCII) characters in my CSV.
## My code runs well in local but when i try to publish it, never works.
## I believe this is the critical part of the code:
## ---
## title: "Airbus Defence & Space"
## output:
## flexdashboard::flex_dashboard:
## orientation: columns
## social: ["linkedin"]
## vertical_layout: fill
## source_code: embed
## theme:
## version: 4
## primary: "#0000FF"
## navbar-bg: "#3ADAC6"
## base_font:
## google: Prompt
## heading_font:
## google: Sen
## code_font:
## google:
## # arguments to sass::font_google()
## family: JetBrains Mono
## local: false
## runtime: shiny
## ---
##
## '''{r setup, include=FALSE}
## (a bunch of libraries)
## # rsconnect::deploy app
## library(rsconnect)
## library(MASS)
## rsconnect::deployApp("C:/Users/kiker/Documents/RDashBoards/Airbus")
## rsconnect::setAccountInfo(name='<username>',
## token='<token>',
## secret='<secret>')
## (in my code i have my username, token and secret well written)
##
## df <- read.csv('C:/Users/kiker/Documents/RDashBoards/airbus_raw_data.csv', encoding = "UTF-8")
## #df <- load("df.RData")
## data(df)
## save(df, file = "df.RData")
## #Init Variables
## ord_df <- df %>% select(-matches("Del|Opr"))
## names(ord_df) <- gsub("^Ord_", "", names(ord_df))
## del_df <- df %>% select(-matches("Ord|Opr"))
## names(del_df) <- gsub("^Del_", "", names(del_df))
## opr_df <- df %>% select(-matches("Ord|Del"))
## names(opr_df) <- gsub("^Opr_", "", names(opr_df))
## aircraft <- ord_df %>% select(-matches("Reg|Cou|All"))
## aircraft <- names(aircraft)
## country <- sort(as.character(df$Country))
## '''
##
## I tried to set apart the .rmd file alone in a new folder as well, but really found no real good answer yet. Would appreciate a lot any help :)
## This is how it looks now:
## Folder
## I am expecing to see the dashboard published correctly in shinyapp.io, as now I can see the status is "running" but shows in the logger:
## This is the whole Deploy log in R:
## Preparing to deploy document...DONE
## Uploading bundle for document: 9208413...Detecting system locale ... DONE
## Deploying bundle: 7299931 for document: 9208413 ...
## Waiting for task: 1306343830
## building: Building image: 8667442
## building: Installing system dependencies
## building: Fetching packages
## building: Installing packages
## building: Installing files
## building: Pushing image: 8667442
## deploying: Starting instances
## rollforward: Activating new instances
## unstaging: Stopping old instances
## Document successfully deployed to https://enriquevillacoronado.shinyapps.io/Airbus_FlexDashBoard_Shiny/
## Deployment completed: https://enriquevillacoronado.shinyapps.io/Airbus_FlexDashBoard_Shiny/
## Warning message:
## Error detecting locale: Error in make.names(col.names, unique = TRUE): invalid multibyte string 3
## (Using default: en_US)
##
## This is the .csv file:
## CSV File |0 |0 |0 |NA |NA |
## | 76395522|38 |0 |r,dataûtable,reshape |2023-06-03 09:59:52Z |SUMIT | 15021889|563 |Now, I have the same input data frame:
## AAA <- c('APIS', 'APIS', 'PIPIS', 'AGAROS', 'AGAROS', 'AGAROS', 'NOTHING')
## BBB <- c('a', 'a', 'a', 'b', 'b', 'c', NA)
## CCC <- c(1, 2, NA, 4, 5, 6, 7)
## DDD <- c("Mat", "ASG", "MNT", "NBEH", "DJ", "EU", "DHR")
## test.data <- data.frame(AAA, BBB, CCC, DDD)
## test.data
##
## I want to reshape it so that each unique "AAA" variable become single row. Multiple entry of the "AAA" variable automatically become new column with suffix or prefix 1,2,3..... Sort of like this:
## AAA <- c('APIS', 'PIPIS', 'AGAROS', 'NOTHING')
## BBB_1 <- c('a', 'a', 'b', NA)
## CCC_1 <- c(1, NA, 4, 7)
## DDD_1 <- c("Mat", "MNT", "NBEH", "DHR")
##
## BBB_2 <- c('a', NA, 'b', NA)
## CCC_2 <- c(2, NA, 5, NA)
## DDD_2 <- c("ASG", NA, "DJ", NA)
##
## BBB_3 <- c(NA, NA, 'c', NA)
## CCC_3 <- c(NA, NA, 6, NA)
## DDD_3 <- c(NA, NA, "EU", NA)
##
## output <- data.frame(AAA, BBB_1, CCC_1, DDD_1, BBB_2, CCC_2, DDD_2,
## BBB_3, CCC_3, DDD_3)
## output
##
## I've looked at melt and cast and a few other things, but none seem to do the job. |0 |4 |12 |NA |NA |
## | 76395332|16 |0 |r,google-drive-api,export,readxl |2023-06-03 09:06:50Z |12666727b9 | 14712320|1,007 |In a general folder, I have this subset of subfolders which I would like to open iteratively to download datasets in .xlsx format
## These are the subfolders that as you could see have a specific pattern name
##
## and within each of them, there is a .xlsx dataset
##
## named aas similarly as the main subfolder where it is contained
## I was wondering on extract them using some iterative function. Based on code I have found through the forum I have found something that I have readapted as for loop, but with no results.
## url = 'urlnamexxx'
## for (folder in url) {
## temp <- tempfile(fileext = ".xlsx")
## download.file(url, temp)
## readxl::read_xlsx(temp)
## }
##
## Could you please give some suggestions? |0 |8 |19 |NA |NA |
## | 76395254|57 |1 |r,vector,match,sapply |2023-06-03 08:45:50Z |Anti | 7128113|353 |I have a long numeric vector x. For every element i in that vector I'd like to know the index of the next subsequent element (in x[(i+1):length(x)] that is greater or equal than the current value in x plus a given number, and smaller than the current value x minus a given number, i.e. x[i] <= 0.5 and x[i] >= 0.5. If no element in x[(i+1):length(x)] fulfills that criterion, the code should return 1e+06.
## I'm able to solve this with a loop, but this is rather slow. Thus, I thought about using a function and apply it on the vector x. I thought about using sapply, however, with the code below I didn't succeed. What's wrong? Which other way could you suggest to return a matrix with 2 rows or 2 columns for every entry in x that returns the indices?
## Here is the code I tried:
## x <- cumsum(rnorm(1000000))
##
## fun <- function(x) {
## val <- x[1]
## x <- x[-1]
## hit.max <- which(x >= val + 0.5)
## hit.max <- ifelse(length(hit.max) > 0 , min(hit.max), 1e+06)
## hit.min <- which(x >= val - 0.5)
## hit.min <- ifelse(length(hit.min) > 0 , min(hit.min), 1e+06)
## return(c(hit.max, hit.min))
## }
##
## k <- sapply(x, fun)
##
## As requested, here is an example. As mentioned above, I'm able to solve the task with a loop (that's what I've done below):
## x <- c(5, 6, 3, 1, 4, 10, 9, 2, 7, 8)
##
## res <- matrix(nrow = 10, ncol = 2)
## for (i in 1:10) {
## hit.max <- which(x[(i + 1):length(x)] >= x[i] + 0.5)
## res[i, 1] <- ifelse(length(hit.max) > 0 , min(hit.max), length(x))
## hit.min <- which(x[(i + 1):length(x)] <= x[i] - 0.5)
## res[i, 2] <- ifelse(length(hit.min) > 0 , min(hit.min), length(x))
## }
##
## The result would look like this:
## res <- structure(c(1L, 4L, 2L, 1L, 1L, 10L, 10L, 1L, 1L, 10L, 2L, 1L,
## 1L, 10L, 3L, 1L, 1L, 10L, 10L, 10L), dim = c(10L, 2L)) |0 |0 |14 |Anti |2023-06-03 13:07:54Z |
## | 76395142|26 |0 |r,pdf,latex,title,quarto |2023-06-03 08:14:59Z |MYaseen208 | 707145|22,494 |I'm interested in including the title page (have a title page as a .png image) and back page (have a title page as a .png image) in a quarto book that will be rendered as a PDF file. I tried the following code, however, I did not get the desired output. I would like to know the steps or methods to accomplish this.
## ---
## book:
## title: "Title"
## author: "MYaseen208"
## format:
## pdf:
## documentclass: scrreprt
## toc: true
## toc-depth: 3
## include-in-header:
## - text: |
## \usepackage{lipsum}
## \usepackage{geometry}
## \geometry{a4paper, total={170mm,257mm}, left=20mm, top=20mm, bottom=20mm, right=50mm}
## ---
##
## # Introduction
##
## \lipsum[1-50]
##
## # Material and Methods
##
## \lipsum[1-20] |36 |161 |304 |MYaseen208 |2023-06-03 08:20:24Z |
## | 76395098|22 |0 |r,ggplot2,dot-plot |2023-06-03 07:58:49Z |Saleh Sarwar | 22012056|1 |I want to create such a dotplot with dotplot of clusterprofiler. Here you can see that the Ontology terms are arranged on same plot with labels at the left of the plot. I was wondering if this can be done in a way that top five ontology terms for "Molecular function", "Cellular Components" and "Biological Processes" on the same plot with "Molecular function" terms on top with it's label on the left and then "Cellular Components" top five terms and so on.
## Can anybody please help me how to do that ? The solution shouldn't necessarily use clusterprofiler, doing all this with ggplot can also be helpful.
## The desired outcome
## Edit:
## Here is an example data that I made with Chatgpt.
## top_enriched <- data.frame(
## Category = rep(c("MF", "CC", "BP"), each = 5),
## Description = c(
## "DNA binding",
## "Kinase activity",
## "Transporter activity",
## "Cytoskeleton",
## "Plasma membrane",
## "Metabolic process",
## "Cellular nitrogen compound metabolic process",
## "Protein metabolic process",
## "Cellular component organization",
## "Nucleus",
## "Signal transduction",
## "Cytoplasm",
## "Ribosome biogenesis",
## "Mitochondrial organization",
## "Cell cycle"
## ),
## p.adjust = runif(15, 0, 0.05),
## Count = sample(5:20, 15, replace = TRUE),
## GeneRatio = runif(15, 0.5, 1)
## )
##
##
##
##
## ggplot(top_enriched, aes(x = GeneRatio, y = Description, color = p.adjust, size = Count)) +
## geom_point() +
## facet_grid(Category ~ .) +
## scale_color_gradient(low = "blue", high = "red") +
## ylab("")
##
## The results of the code is shown in below image
## The result of the above code
## In my real data I have separated the top five terms for each Ontology and made a separate data frame for that. The problem is if I use Facet grid it displays the description in all of the subplot of all of the Ontology terms. I rather want it to have the description related to the specific Ontology term on the subplot. |0 |0 |0 |Saleh Sarwar |2023-06-03 18:34:38Z |
## | 76394809|20 |-1 |r,dataframe,sorting |2023-06-03 06:27:33Z |RedRageChicken | 22011703|1 |UPDATE: Q - Is there a better way of doing this?
## rows.contain.nineteen.ten <- head(summer.url.df, 30)
## Separates rows 1-30.
## #The next question requires further separating another date from the column list of dates and I came up with this...
## rows.contain.twenty.three.ten <- summer.url.df[31:55, ]
## #I need to create a variable filtering for all rows based on a numerical value of 201910.
## #Filter or select rows from within a data frame.
## read.csv("https://raw.githubusercontent.com/orban-classroom/Statistics-with-R/master/2023-spring/data/2300data.csv")
## #I see that rows [1:30] have the 201910 value, but I don't know how to ask R to separate those rows from the rest of the rows.
## #Trying to figure this out from the free Learning Statistics with R textbook. https://learningstatisticswithr.com/ |0 |0 |2 |RedRageChicken |2023-06-03 21:39:43Z |
## | 76394793|11 |0 |r,bayesian,stan,ordinal |2023-06-03 06:19:14Z |Chris S | 21991918|11 |I was wondering how one can use Stan to generate prior and posterior predictive distributions for partial proportional odds (PPO) models? I've provided code for an unconstrained PPO model below with my attempt of generating a posterior predictive distribution, not sure if it's correct, and where to start to code up a prior predictive distribution. Any help for this and the constrained model would be most helpful!
## Unconstrained PPO model Stan code:
## functions {
## // pointwise log-likelihood contributions
## vector pw_log_lik(vector alpha, vector beta, matrix tau,
## row_vector[] X, row_vector[] Z, int[] y) {
## int N = size(X);
## vector[N] out;
## int k = max(y); // assumes all possible categories are observed
## int j;
## real cj;
## real cj1;
## for (n in 1:N) {
## real eta = X[n] * beta;
## j = y[n];
## if (j == 1) cj = -( alpha[1] + eta );
## else if (j == 2) cj = alpha[1] + eta;
## else cj = alpha[j - 1] + eta + Z[n] * tau[ , j - 2];
## if(j > 1 && j < k) cj1 = alpha[j] + eta + Z[n] * tau[ , j - 1];
##
## if (j == 1 || j == k) out[n] = log_inv_logit(cj);
## // else out[n] = log(1./(1. + exp(-cj)) - 1./(1. + exp(-cj1)));
## else out[n] = log_diff_exp(-log1p_exp(- cj),
## -log1p_exp(- cj1));
## // else out[n] = log(-log1p_exp(-cj) + log1p_exp(-cj1));
## }
## return out;
## }
##
## // Pr(y == j)
## matrix Pr(vector alpha, vector beta, matrix tau,
## row_vector[] X, row_vector[] Z, int[] y) {
## int N = size(X);
## int k = max(y); // assumes all possible categories are observed
## matrix[N, k] out;
##
## for(n in 1:N) {
## real eta = X[n] * beta ;
## for(j in 1 : k) {
## real cj;
## real cj1;
## if (j == 1) cj = -( alpha[1] + eta );
## else if (j == 2) cj = alpha[1] + eta;
## else cj = alpha[j - 1] + eta + Z[n] * tau[ , j - 2];
## if(j > 1 && j < k) cj1 = alpha[j] + eta + Z[n] * tau[ , j - 1];
##
## if (j == 1 || j == k) out[n, j] = log_inv_logit(cj);
## //else out[n, j] = log(1./(1. + exp(-cj)) - 1./(1. + exp(-cj1)));
## // else out[n, j] = log_diff_exp(-log1p_exp(-cj),
## // -log1p_exp(-cj1));
## else out[n, j] = log(-log1p_exp(-cj) + log1p_exp(-cj1));
## }
## }
## return exp(out);
## }
## }
## data {
## int<lower = 1> N; // number of observations
## int<lower = 1> p; // number of predictors
## int<lower = 1> q; // number of non-PO predictors in Z
## matrix[N, p] X; // matrix of CENTERED predictors
## matrix[N, q] Z; // matrix of CENTERED PPO predictors
## int<lower = 2> k; // number of outcome categories
## int<lower = 1, upper = k> y[N]; // outcome on 1 ... k
##
## // prior standard deviations
## real<lower = 0> sds;
## real<lower = 0> sdsppo;
## real<lower = 0> conc;
## }
##
## transformed data {
## row_vector[p] Xr[N];
## row_vector[q] Zr[N];
## for (n in 1:N) Xr[n] = X[n, ];
## for (n in 1:N) Zr[n] = Z[n, ];
## }
##
## parameters {
## vector[p] beta; // coefficients on X
## matrix[q, k - 2] tau; // coefficients on Z
## simplex[k] pi; // category probabilities for a person w/ average predictors
## }
##
## transformed parameters {
## vector[k - 1] alpha; // intercepts
## vector[N] log_lik; // log-likelihood pieces
## for (j in 2:k) alpha[j - 1] = logit(sum(pi[j:k])); // predictors are CENTERED
## log_lik = pw_log_lik(alpha, beta, tau, Xr, Zr, y);
## }
##
## model {
## target += log_lik;
## target += dirichlet_lpdf(pi | rep_vector(conc, k));
## target += normal_lpdf(beta | 0, sds);
## for (j in 1:(k - 2)) target += normal_lpdf(tau[ , j] | 0, sdsppo);
## }
##
## //generated quantities {
## // vector[p] OR = -beta - tau[1,1];
## // vector[N] y_rep;
## // for (n in 1:N)
## // y_rep[n] = ordered_logistic_rng(X[n]*beta + Z[n]*tau[,1], -alpha);
## //}
##
## Constrained PPO model:
## functions {
## // pointwise log-likelihood contributions
## vector pw_log_lik(vector alpha, vector beta, vector tau, vector pposcore,
## row_vector[] X, row_vector[] Z, int[] y)
## {
## int N = size(X);
## vector[N] out;
## int k = max(y); // assumes all possible categories are observed
## real zeta = 0.;
## real r = 0.;
## real r2 = 0.;
## real ca;
## real ca1;
## int a;
## int q = size(Z) > 0 ? cols(Z[1]) : 0;
## for (n in 1:N) {
## real eta = X[n] * beta;
## if(q > 0) zeta = Z[n] * tau;
## a = y[n];
## if(q == 0) { // PO
## if (a == 1) ca = -(alpha[1] + eta);
## else if (a == 2) ca = alpha[1] + eta;
## else ca = alpha[a - 1] + eta;
## if(a > 1 && a < k) ca1 = alpha[a] + eta;
## }
## else {
## if (a == 1) ca = -( alpha[1] + eta + pposcore[2] * zeta);
## else if (a == 2) ca = alpha[1] + eta + pposcore[2] * zeta;
## else ca = alpha[a - 1] + eta + pposcore[a] * zeta;
## if(a > 1 && a < k) ca1 = alpha[a] + eta + pposcore[a+1] * zeta;
## }
## if (a == 1 || a == k) out[n] = log_inv_logit(ca);
## else out[n] = log(1./(1. + exp(-ca)) - 1./(1. + exp(-ca1)));
## // if(q > 0) r = pposcore[a] * zeta;
## // out[n] = log_inv_logit(alpha[a-1] + eta + r);
## }
## return out;
## }
## }
##
## data {
## int<lower = 1> N; // number of observations
## int<lower = 1> p; // number of predictors
## int<lower = 0> q; // number of non-PO predictors in Z
## int<lower = 2> k; // number of outcome categories
## int<lower = 0, upper = k> lpposcore; // extent of pposcore (1=PO)
## matrix[N, p] X; // matrix of CENTERED predictors
## matrix[N, q] Z; // matrix of CENTERED PPO predictors
## int<lower = 1, upper = k> y[N]; // outcome on 1 ... k
## vector[lpposcore] pposcore; // scores for constrained partial PO
##
## // prior standard deviations
## real<lower = 0> sds;
## real<lower = 0> sdsppo;
## real<lower = 0> conc;
## }
##
## transformed data {
## row_vector[p] Xr[N];
## row_vector[q] Zr[N];
## for (n in 1:N) Xr[n] = X[n, ];
## for (n in 1:N) Zr[n] = Z[n, ];
## }
##
## parameters {
## vector[p] beta; // coefficients on X
## vector[q] tau; // coefficients on Z
## simplex[k] pi; // category probabilities for a person w/ average predictors
## }
##
## transformed parameters {
## vector[k - 1] alpha; // intercepts
## vector[N] log_lik; // log-likelihood pieces
## for (j in 2:k) alpha[j - 1] = logit(sum(pi[j:k])); // predictors are CENTERED
## log_lik = pw_log_lik(alpha, beta, tau, pposcore, Xr, Zr, y);
## }
##
## model {
## target += log_lik;
## target += dirichlet_lpdf(pi | rep_vector(conc, k));
## target += normal_lpdf(beta | 0, sds);
## if(q > 0) target += normal_lpdf(tau | 0, sdsppo);
## }
##
## //generated quantities {
## // vector[p] OR1 = exp(beta + pposcore[2]*tau[1]);
## // vector[p] OR2 = exp(beta + pposcore[3]*tau[1]);
## //}
##
## As an aside, I am curious what the values pposcore and lpposcore would take when using the constrained PPO model - let's say I specify a linear relationship between the cumulative log-OR and cumulative logits, what would the values of pposcore/lpposcore take in the Stan model? Or if I specify that only the highest cumulative logit diverges from the proportional odds assumption? (these are just random examples but it will help me think about it!) |0 |0 |0 |NA |NA |
## | 76394787|15 |0 |r |2023-06-03 06:16:59Z |Sou | 21976989|11 |There seems to be a difference between levels and labels of a factor in R.
## Up to now, I always thought that levels were the 'real' name of factor levels, and labels were the names used for output (such as tables and plots). Obviously, this is not the case, as the following example shows:
##
## df <- data.frame(v=c(1,2,3),f=c('a','b','c'))
## str(df)
## 'data.frame': 3 obs. of 2 variables:
## $ v: num 1 2 3
## $ f: Factor w/ 3 levels "a","b","c": 1 2 3
##
## df$f <- factor(df$f, levels=c('a','b','c'),
## labels=c('Treatment A: XYZ','Treatment B: YZX','Treatment C: ZYX'))
## levels(df$f)
## [1] "Treatment A: XYZ" "Treatment B: YZX" "Treatment C: ZYX"
##
##
## I thought that the levels ('a','b','c') could somehow still be accessed when scripting, but this doesn't work:
##
## > df$f=='a'
## [1] FALSE FALSE FALSE
##
##
## But this does:
##
## > df$f=='Treatment A: XYZ'
## [1] TRUE FALSE FALSE
##
##
## So, my question consists of two parts:
##
## What's the difference between levels and labels?
## Is it possible to have different names for factor levels for scripting and output?
## Background: For longer scripts, scripting with short factor levels seems to be much easier. However, for reports and plots, this short factor levels may not be adequate and should be replaced with preciser names. |4 |25 |26 |divibisan |2018-09-05 17:10:09Z, 2015-10-08 21:30:30Z |
## | 76394767|5 |0 |r |2023-06-03 06:09:38Z |user22011657 | 22011657|1 |BEKK-GARCH model with external term
## I tried to use the BEKK-GARCH process with an external term, but I couldn't find any built-in function which takes into account the external term. Does anyone give me any idea about this?
## Thanks in advance |0 |0 |0 |NA |NA |
## | 76394709|29 |0 |r,reshape |2023-06-03 05:45:18Z |user257122 | 12353013|31 |This question already has answers here:
##
##
##
##
##
##
## Reshaping wide to long with multiple values columns [duplicate]
##
## (5 answers)
##
##
## Closed 15 hours ago.
##
##
##
## I need to reshape a wide formate data into long fromat, for example, to reshape this data
## dw <- read.table(header=T, text='
## sbj gender age avg.1 sd.1 avg.2 sd.2
## A 1 22 10 6 50 10
## B 1 23 12 5 70 11
## C 2 21 20 7 20 8
## D 2 20 22 8 22 9
## ')
##
## into
## ## sbj gender age var avg sd
## ## A.1 A 1 22 1 10 6
## ## B.1 B 1 23 1 12 5
## ## C.1 C 2 21 1 20 7
## ## D.1 D 2 20 1 22 8
## ## A.2 A 1 22 2 50 10
## ## B.2 B 1 23 2 70 11
## ## C.2 C 2 21 2 20 8
## ## D.2 D 2 20 2 22 9
##
## There is a similar question, see here. However, when I input the code:
## lg <- reshape(dw,
## idvar = c("sbj", "gender", "age"),
## varying = list(4:7),
## v.names = c("avg", "sd"),
## timevar = "var",
## direction = "long")
##
## It output the wrong result. I was quite confused with the document of the reshpe function in R, is there something wrong with my code? |0 |0 |4 |NA |NA |
## | 76394485|53 |1 |r,dplyr,tidy |2023-06-03 03:49:08Z |Fadhil Dzikri | 21339125|113 |i want to grouping the data but there have a rules
## ex my df is
## df <- data.frame(NAME = c("ALEX", "ALEX", "ALEX", "BRODI", "BRODI", "BRODI"),
## CLASS = c("A", "A", "A", "B", "B", "B"),
## TIME = c(1213, 1314, 1415, 1213, 1415, 1617),
## SALARY = c(100, 200, 300, 100, 200, 300))
##
## the time is starttime to endtime and with char type data
## i want to group by name,class and time to summarize the salary but the time will be group if there are equal start and end time (ex 1213,1314,1415) will be 1 group to (1215) and salary will be summarize and (1213,1415,1617) can't be group because the time isn't continue.
## my expected df is
## df2 <- data.frame(NAME = c("ALEX", "BRODI", "BRODI", "BRODI"),
## CLASS = c("A", "B", "B", "B"),
## TIME = c(1215, 1213, 1415, 1617),
## SALARY = c(600, 100, 200, 300)) |0 |0 |5 |jpsmith |2023-06-03 03:53:24Z, 2023-06-03 04:50:43Z |
## | 76394461|50 |0 |r,geosphere |2023-06-03 03:32:20Z |stats_noob | 13203841|5,117 |I am working with the R programming language.
## I have the following data frames:
## set.seed(123)
##
## df_1 <- data.frame(
## name_1 = c("john", "david", "alex", "kevin", "trevor", "xavier", "tom", "michael", "troy", "kelly", "chris", "henry", "taylor", "ryan", "peter"),
## lon = rnorm(15, mean = -74.0060, sd = 0.01),
## lat = rnorm(15, mean = 40.7128, sd = 0.01)
## )
##
## df_2 <- data.frame(
## name_2 = c("matthew", "tyler", "sebastian", "julie", "anna", "tim", "david", "nigel", "sarah", "steph", "sylvia", "boris", "theo", "malcolm"),
## lon = rnorm(14, mean = -74.0060, sd = 0.01),
## lat = rnorm(14, mean = 40.7128, sd = 0.01)
## )
##
## My Problem: I want to find out the distance between all people from df_1 and df_2 and then perform some summary statistics (e.g. for each person in df_1 - what is the distance to the closest person df_2, furthest person in df_2, average person in df_2, etc.)
## I did this myself like this:
## library(geosphere)
##
## haversine_distance <- function(lon1, lat1, lon2, lat2) {
## distHaversine(c(lon1, lat1), c(lon2, lat2))
## }
##
##
## distances <- matrix(nrow = nrow(df_1), ncol = nrow(df_2))
## # calculate the distances
## for (i in 1:nrow(df_1)) {
## for (j in 1:nrow(df_2)) {
## distances[i, j] <- haversine_distance(df_1$lon[i], df_1$lat[i], df_2$lon[j], df_2$lat[j])
## }
## }
##
##
##
## final <- data.frame(
## name_1 = rep(df_1$name_1, each = nrow(df_2)),
## lon_1 = rep(df_1$lon, each = nrow(df_2)),
## lat_1 = rep(df_1$lat, each = nrow(df_2)),
## name_2 = rep(df_2$name_2, nrow(df_1)),
## lon_2 = rep(df_2$lon, nrow(df_1)),
## lat_2 = rep(df_2$lat, nrow(df_1)),
## distance = c(distances)
## )
##
## final_summary <- aggregate(distance ~ name_1,
## data = final,
## FUN = function(x) c(min = min(x),
## max = max(x),
## mean = mean(x),
## median = median(x),
## sd = sd(x)))
## final_summary <- do.call(data.frame, final_summary)
## names(final_summary)[-(1)] <- c("min_distance", "max_distance", "mean_distance", "median_distance", "sd_distance")
##
## In another question (R: Improving the Speed of Pairwise Calculations), I learned how to do the same thing but in a more efficient way (I made some modifications):
## haversine_distance <- function(lon1, lat1, lon2, lat2) {
## distHaversine(cbind(lon1, lat1), cbind(lon2, lat2))
## }
##
## # calculate the distances
## distances <- expand.grid(i = seq_len(nrow(df_1)), j = seq_len(nrow(df_2)))
## distances$dist <- haversine_distance(
## df_1$lon[distances$i],
## df_1$lat[distances$i],
## df_2$lon[distances$j],
## df_2$lat[distances$j]
## )
##
## final <- data.frame(
## name_1 = df_1$name_1[distances$i],
## lon_1 = df_1$lon[distances$i],
## lat_1 = df_1$lat[distances$i],
## name_2 = df_2$name_2[distances$j],
## lon_2 = df_2$lon[distances$j],
## lat_2 = df_2$lat[distances$j],
## distance = distances$dist
## )
##
## final_summary1 <- aggregate(
## distance ~ name_1,
## data = final,
## FUN = function(x) c(min = min(x), max = max(x), mean = mean(x), median = median(x), sd = sd(x))
## )
## final_summary1 <- do.call(data.frame, final_summary1)
## names(final_summary1)[-(1)] <- c("min_distance", "max_distance", "mean_distance", "median_distance", "sd_distance")
##
## My Question: When I compare the results from both methods on the same data - apparently they are not the same:
## > identical(final_summary, final_summary1)
## [1] FALSE
##
## Can someone please help me understand why these two outputs are not the same and how can I make them the same? Are both approaches correct?
## Thanks! |3 |21 |72 |Dave2e |2023-06-03 12:52:35Z |
## | 76394455|18 |0 |r,shiny |2023-06-03 03:30:15Z |yefersonG | 14376017|151 |I am working on a function in R using Shiny Survey. Here is the code that I have so far.
## # Load packages
## library(shiny)
## library(shinysurveys)
## library(shiny.semantic)
##
## # Register a "check" input type
## extendInputType("check", function() {
## shiny::checkboxGroupInput(
## inputId = surveyID(),
## label = surveyLabel(),
## choices = surveyOptions()
## )
## })
##
## # Define question in the format of a shinysurvey
## ice_cream_question <- data.frame(
## question = "Elija el software donde se trabajó este script.",
## option = c("Nfield", "Dimension", "Survey to Go","Question Pro"),
## input_type = "select",
## input_id = "favorite_ice_cream",
## dependence = NA,
## dependence_value = NA,
## required = TRUE
## )
##
## # Define shiny UI
## ui <- fluidPage(
## surveyOutput(df = ice_cream_question,
## survey_title = "Procesamiento Mercaplan",
## theme = 'mercaplan'
## )
## )
##
## # Define shiny server
## server <- function(input, output, session) {
## survey_env <- new.env() # Crear y inicializar el objeto survey_env
##
## # Observar cambios en la selección de tema y actualizar el tema en el entorno survey_env
## observe({
## survey_env$theme <- ifelse(input$theme == "mercaplan", "mercaplan", survey_env$theme)
## })
##
## renderSurvey() # Renderizar la encuesta en la interfaz de usuario
##
## # Observar el evento de envÃo de la encuesta
## observeEvent(input$submit, {
## response_data <- getSurveyData() # Obtener los datos de la encuesta
## print(response_data) # Imprimir los datos de la encuesta en la consola
## })
## }
##
## #' Aplicación Shiny para una encuesta interactiva sobre el software utilizado en un script.
## #'
## #' Esta aplicación utiliza el paquete "shinysurveys" para crear una encuesta interactiva en Shiny.
## #' El usuario puede seleccionar el software utilizado en un script de una lista de opciones.
## #'
## #' @importFrom shiny shinyApp fluidPage
## #' @importFrom shinysurveys extendInputType surveyOutput renderSurvey getSurveyData
## #' @importFrom shiny.semantic surveyLabel surveyOptions
## #' @return None
## #' @examples pantalla
## #' @export pantalla
## #'
## pantalla <- function() {
## shinyApp(ui, server)
## }
##
## # Ejecutar la aplicación Shiny
## return(pantalla())
##
##
## Now I am trying to create the package with the following function in order to generate the .Rd file, but the following issue occurs.
## devtools::load_all()
##
## Loading procesamientomerca
## Input Type "check" registered with {shinysurveys}. If the session restarts, you will need to re-register it.
## To see all registered input extensions, please call shinysurveys::listInputExtensions().
## Warning messages:
## 1: object âsurveyLabelâ is not exported by 'namespace:shiny.semantic'
## 2: object âsurveyOptionsâ is not exported by 'namespace:shiny.semantic'
## 3: Objects listed as exports, but not present in namespace:
## ⢠app
## I hope you can help me because this Shiny app needs to be composed of other functions. |0 |0 |8 |NA |NA |
## | 76394451|22 |0 |r,string,substr |2023-06-03 03:27:56Z |Adrian | 3391549|9,275 |mystring <- c("code IS (384333)\n AND parse = TURE \n ) \n
## \n code IS (43343344)\n ) some information here
## code IS (23423422) ) and more information")
##
## I'm trying to replace the 2nd to 40th characters of mystring with "HELLO". However, sub is not giving me the correct output:
## > sub(pattern = substr(mystring, 2, 40), replacement = "HELLO", mystring)
## [1] "code IS (384333)\n AND parse = TURE \n ) \n \n \n code IS (43343344)\n ) some information here\n code IS (23423422) ) and more information"
##
## The correct output is
## [1] "cHELLO \n \n \n code IS (43343344)\n ) some information here\n code IS (23423422) ) and more information" |24 |73 |131 |NA |NA |
## | 76394390|34 |2 |r,string,substr |2023-06-03 03:00:13Z |Adrian | 3391549|9,275 |string <- "this is a funny cat"
##
## I want to replace the first 15 characters of string with 'orange`. The desired output is
## 'orange cat'
##
## However, using substr gives me
## substr(string, 1, 15) <- "orange"
## > string
## [1] "oranges a funny cat"
##
## which is not the desired output. |24 |73 |131 |Adrian |2023-06-03 03:11:24Z |
## | 76394371|19 |0 |r |2023-06-03 02:49:56Z |Aztec22 | 10724981|31 |I have a data frame called df that contains 2 columns; user and artist.
## head(df, 5)
## user artist
## 1 3bd73256-3905-4f3a-97e2-8b341527f805 betty blowtorch
## 2 f2fb0ff0-5679-42ec-a55c-15109ce6e320 die Ãrzte
## 3 b3ae82c2-e60b-4551-a76d-6620f1b456aa melissa etheridge
## 4 3d6bbeb7-f90e-4d10-b440-e153c0d10b53 elvenking
## 5 bbd2ffd7-17f4-4506-8572-c1ea58c3f9a8 juliette & the licks
##
## I want to create another data frame that gives each entry in the artist column a row that is then represented by a 0 or 1 based on if the user ID has an entry for that artist. Here is an example of the results I am trying to get:
## user betty blowtorch die Ãrzte melissa etheridge elvenking juliette & the licks
## 1 3bd73256-3905-4f3a-97e2-8b341527f805 1 0 0 0 0
## 2 3bd73256-3905-4f3a-97e2-8b341527f805 0 1 0 0 0
## 3 f2fb0ff0-5679-42ec-a55c-15109ce6e320 0 1 0 0 0
## 4 b3ae82c2-e60b-4551-a76d-6620f1b456aa 0 0 1 0 0
## 5 3d6bbeb7-f90e-4d10-b440-e153c0d10b53 0 0 0 1 0
## 6 bbd2ffd7-17f4-4506-8572-c1ea58c3f9a8 0 0 0 0 1
## 7 bbd2ffd7-17f4-4506-8572-c1ea58c3f9a8 0 0 0 1 0
##
## If the user ID appears in df multiple times, then I need a new row for that user, so each row will only have one 1 value (see above example).
## Any suggestions? |0 |0 |7 |Phil |2023-06-03 03:13:45Z, 2023-06-03 03:19:39Z |
## | 76394359|16 |0 |r,spatial,covariance,temporal |2023-06-03 02:42:40Z |EM823823 | 14336849|121 |I'd like to simulate a Gaussian process with mean zero and a non-separable Gneiting correlation function. The R-code for this function is:
## Gneiting<-function(h,u, al = 0.75, s2 = 0.968, a=0.5,c=1,beta)
## {
## part1=a*abs(u)^(2*al)+1;
## denom=part1^(beta/2);
## part2=exp(-c*h/denom);
## return(s2*part2/part1);
## }
##
##
## To simulate the process with this correlation function I do:
## library(fields)
## time <- 1:6
## loc <- cbind(expand.grid(seq(-83, -74, len = 16), seq(30, 36, len = 16)))
## locdist <- rdist.earth(loc)
## #spatial locations
## ns = dim(loc)[1]
## #time points
## nt = 6
## #Compute the Covariance
## Sigma=matrix(0,ns*nt,ns*nt)
## for(space1 in 1:ns){
## for(space2 in 1:ns)
## {
## hlag=locdist[space1,space2]
## for(time1 in 1:nt)
## {
## for(time2 in 1:nt)
## {
## ulag=time2 - time1
## index1=(time1-1)*ns+space1;
## index2=(time2-1)*ns+space2;
## Sigma[index1,index2]=Gneiting(h = hlag, u=ulag, beta = .6)
## }
## }
## }
## }
##
## #Simulate the covariance
## set.seed(123)
## xx.gneiting <- crossprod(chol(Sigma), rep(rnorm(nrow(loc) * 6)))
##
## However, from the level plot the resulting process doesn't seem to have any spatio-temporal structure:
## simdf.gneiting <- data.frame(lon = rep(loc[,1], each = 6),
## lat = rep(loc[,2], each = 6),
## dat= xx.gneiting,
## time = rep(1:6, nrow(loc)) )
##
## # plot
## lattice::levelplot(dat~ lon + lat|time, data = simdf.gneiting)
##
## I was wondering if my correlation setup is properly done, or could I visualize my process differently than a levelplot to get a better idea about the spatio-temporal structure? |0 |0 |5 |NA |NA |
## | 76394250|11 |0 |r,linear-regression,least-squares,multicollinearity,variance-inflation-factor |2023-06-03 01:37:18Z |José Amorim | 22010904|1 |I am using the GVIF method in my analyses to check for multicollinearity of my variables using the 'car' package in R, but some of the GVIF values are negative, so GVIF^(1/(2df)) values can't be calculated.
## How can I fix this? Or is it correct to use these negative GVIF values to calculate (GVIF^(1/(2df)))²? |0 |0 |0 |NA |NA |
## | 76394080|27 |0 |r,categorical-data,unmarked-package |2023-06-03 00:06:53Z |AllyDavidge | 22010681|1 |I am fairly new to unmarked as a package, but I'm trying to estimate the occupancy of Cooper's Hawks across Denver considering a list of covariates. I've found that percent tree canopy cover (PCT_CAN) and redlining grade (GRADE) out compete the other covariates I've considered in an AICc table and now I'm trying to create partial derivative xy plots of the estimates I get for occupancy for each of these within their own models. I'm having a difficult time creating predictions using my unmarked dataframe because R can't process an estimate around the categorical GRADE. I've included a screenshot of my unmarkedFrame. I need to 1) identify how to separate out GRADE and correctly process it because I'll need to create occupancy estimates for that covariate as well and 2) figure out the code for developing these xy plots with a 95% confidence interval envelope around it. Happy to provide more info, I'm not entirely sure what would be most useful.
## Here's what I usually get back when I try to run this:
## occu_modavg_psi_predict_df <- data.frame(Predicted = occu_modavg_psi_predict$mod.avg.pred,
## + lower = occu_modavg_psi_predict$lower.CL,
## + upper = occu_modavg_psi_predict$upper.CL,
## + siteCovs)
## > head(occu_modavg_psi_predict_df)
## > occu_PCTCAN_newdata <- data.frame(PCT_CAN = seq(min(siteCovs$PCT_CAN),
## + max(siteCovs$PCT_CAN), by = 0.5),
## + Pct_Built = mean(siteCovs$Pct_Built), # hold other variables constant
## + NDVI = mean(siteCovs$NDVI),
## + NN = mean(siteCovs$NN))
## > # Model-averaged prediction of occupancy and confidence interval
## > occu_PCTCAN_pred <- modavgPred(occu_model_list,
## + # c.hat = # to change variance inflation factor, default = 1)
## + parm.type = "psi", # psi = occupancy
## + newdata = occu_PCTCAN_newdata)[c("mod.avg.pred",
## + "lower.CL",
## + "upper.CL")]
## Error in eval(predvars, data, env) : object 'GRADE' not found
## > occu_PCTCAN_newdata <- data.frame(PCT_CAN = seq(min(siteCovs$PCT_CAN),
## + max(siteCovs$PCT_CAN), by = 0.5),
## + Pct_Built = mean(siteCovs$Pct_Built), # hold other variables constant
## + NDVI = mean(siteCovs$NDVI),
## + NN = mean(siteCovs$NN),
## + GRADE= mean(siteCovs$GRADE))
## Warning: argument is not numeric or logical: returning NA
## > occu_PCTCAN_pred <- modavgPred(occu_model_list,
## + # c.hat = # to change variance inflation factor, default = 1)
## + parm.type = "psi", # psi = occupancy
## + newdata = occu_PCTCAN_newdata)[c("mod.avg.pred",
## + "lower.CL",
## + "upper.CL")]
## Warning: variable 'GRADE' is not a factorError in .local(obj, coefficients, ...) :
## ncol(coefficients) == length(est) is not TRUE |0 |0 |0 |Jon Spring |2023-06-03 00:28:59Z |
## | 76394018|30 |0 |r,statistics,bayesian,jags,rjags |2023-06-02 23:40:49Z |0yl3r | 15738464|1 |I am new to Bayesian statistics and I have been trying to write up a logistic model for JAGS in R.
## I have defined the priors and the likelihood accordingly. However. I keep getting an error "Node Inconsistent with parents". I have 6 categorical variables and 4 Numerical variables. Any advise would be much appreciated. Thank you.
## Code:
## model = "model{
## # Priors
## beta1~dnorm(0,1/100^2);beta2~dunif(0,1);
## beta3~dnorm(64.4,1/100^2);beta4~dunif(0,1);
## beta5~dunif(0,1);beta6~dunif(0,1);
## beta7~dunif(0,1);beta8~dnorm(0.40,1/100^2);
## beta9~dnorm(0.64,1/100^2);beta10~dunif(0,1);
##
## # Likelihood
## for(i in 1:N) {
## p[i] = 1/(1 + exp(-(beta1
## + beta2*genderMale[i]
## + beta3*age[i]
## + beta4*hypertension1[i]
## + beta5*heart_disease1[i]
## + beta6*ever_marriedYes[i]
## + beta7*Residence_typeUrban[i]
## + beta8*avg_glucose_level[i]
## + beta9*bmi[i]
## + beta10*smoking_statussmokes[i])));
## Y[i] ~ dbern(p[i])
## }
## }"
##
## data$gender <- as.factor(data$gender)
## data$hypertension <- as.factor(data$hypertension)
## data$heart_disease <- as.factor(data$heart_disease)
## data$Residence_type <- as.factor(data$Residence_type)
## data$smoking_status <- as.factor(data$smoking_status)
## data$work_type <- as.factor(data$work_type)
##
## X <- model.matrix(stroke ~ gender + age + hypertension + heart_disease + ever_married + Residence_type + avg_glucose_level + bmi + smoking_status, data=data) # Design matrix
## Y <- data$stroke;
##
## library("rjags");library(coda);
## set.seed(42)
##
## data = list(Y = Y, genderMale = X[,"genderMale"], age = X[,"age"], hypertension1 = X[,"hypertension1"], heart_disease1 = X[,"heart_disease1"], ever_marriedYes = X[,"ever_marriedYes"], Residence_typeUrban = X[,"Residence_typeUrban"], avg_glucose_level = X[,"avg_glucose_level"], bmi = X[,"bmi"], smoking_statussmokes = X[,"smoking_statussmokes"], N =length(Y));
## variable_names = c("beta1","beta2","beta3","beta4","beta5","beta6","beta7","beta8","beta9","beta10","p");
##
## fileConn=file("model.temp");
## writeLines(model, fileConn);
## close(fileConn);
## m = jags.model(file="model.temp", data=data)
##
##
## draw = jags.samples(m, 1000, thin=1, variable.names = variable_names)
## # Convert to a list
## make_list <- function(draw)
## {
## results = list()
## for(name in names(draw))
## {
## # Extract "chain 1"
## results[[name]] = as.array(draw[[name]][,,1])
##
## # Transpose 2D arrays
## if(length(dim(results[[name]])) == 2)
## results[[name]] = t(results[[name]])
## }
## return(results)
## } |0 |0 |0 |NA |NA |
## | 76393934|35 |0 |r,dataframe,function,dplyr,multiple-columns |2023-06-02 23:11:25Z |Sylvia Rodriguez | 11478372|1,193 |I would like to create a new column in a data.frame with the values of one column but using the sign (+ or -; <0 or >0) added. Here is an example.
## set.seed(12345)
## df = data.frame(a=c(rep("a",8), rep("b",5), rep("c",7), rep("d",10)),
## b=rnorm(30, 0, 2),
## c=rnorm(30, 12, 3.5),
## d=rnorm(30, 8, 3)
## )
##
## How can I create a column d_signed, which has each value of column d, but with the sign of column b? Expected result:
## df$d_signed <- c(8.448776, 3.972406, -9.659909, -12.769889, 6.239361, -2.502868, 10.664418,
## -12.780465, -9.550564, -4.112985, -8.163847, 5.646052, 4.851942, 14.991536,
## -12.208116, 10.827803, -10.478775, -5.565379, 9.428745, 11.063775, 9.936149,
## 11.129431, -7.086893, -15.431333, -10.913662, 13.601298, -10.016127, 7.076140,
## 9.609571, -10.474610)
##
## Thank you. |0 |10 |26 |NA |NA |
## | 76393814|32 |1 |r |2023-06-02 22:35:33Z |TJ87 | 9120273|372 |My question is a variation on Complete missing dates based on start and end. I would like to fill in missing date ranges using a floor date and ceiling date, but the date ranges in my example should be non-overlapping.
## have <- data.frame(ID = c(1, 1),
## date1 = as.Date(c("2000-01-29", "2000-07-16")),
## date2 = as.Date(c("2000-07-15", "2001-04-30")))
##
## > have
## ID date1 date2
## 1 1 2000-01-29 2000-07-15
## 2 1 2000-07-16 2001-04-30
##
## want <- data.frame(ID = c(1, 1),
## date1 = as.Date(c("2000-01-01", "2000-01-29", "2000-07-16", "2001-05-01")),
## date2 = as.Date(c("2000-01-28", "2000-07-15", "2001-04-30", "2001-07-31")))
##
## > want
## ID date1 date2
## 1 1 2000-01-01 2000-01-28
## 2 1 2000-01-29 2000-07-15
## 3 1 2000-07-16 2001-04-30
## 4 1 2001-05-01 2001-07-31
##
## Using the answer to the previous post nearly works, except that date2 and the following date1 overlap.
## library(dplyr)
## start <- as.Date("2000-01-01"); end <- as.Date("2001-07-31")
##
## ex <- have %>%
## group_by(ID) %>%
## summarize(newdate1 = c(start, date2), newdate2 = c(date1, end)) %>%
## rename(date1 = newdate1, date2 = newdate2) %>%
## filter(date2 > date1) %>%
## ungroup() %>%
## bind_rows(have) %>%
## arrange(ID, date1)
##
## > ex
## # A tibble: 5 Ã 3
## ID date1 date2
## <dbl> <date> <date>
## 1 1 2000-01-01 2000-01-29
## 2 1 2000-01-29 2000-07-15
## 3 1 2000-07-15 2000-07-16
## 4 1 2000-07-16 2001-04-30
## 5 1 2001-04-30 2001-07-31
##
## Thank you. |0 |2 |13 |NA |NA |
## | 76393742|24 |2 |r,machine-learning,party |2023-06-02 22:16:41Z |Remy M | 7605919|577 |I am trying to use the MOB procedure from the R package partykit to predict survival probabilities based on a set of covariates X1,...,X25 and a treatment effect W. The linear predictor in each node in MOB only uses W, X1, and X2, but each covariate is used for selection for node splitting. I would like to force the MOB to only split according to parameter instability for the treatment effect W. When doing prediction in the final line of code below, I get the following error:
## Error in rval[ix[[i]], ] <- preds[[i]] :
## number of items to replace is not a multiple of replacement length
## In addition: Warning messages:
## 1: 'newdata' had 1070 rows but variables found have 1029 rows
## 2: 'newdata' had 1337 rows but variables found have 1291 rows
## 3: 'newdata' had 1690 rows but variables found have 1680 rows
## 4: 'newdata' had 903 rows but variables found have 1000 rows
##
## I believe this error occurs because the number of test observations falling in each terminal node is different than that of the training observations. How can I modify the predict statement to handle this issue and obtain predictions on the test set? I would also like to know if I'm using the parm option correctly in specifying that parameter instability should be assessed according to W.
## library("survival")
## library("partykit")
##
## n=5000;n.test=5000;p=25;pi=0.5;beta=1
## gamma=0.5;rho=2;cen.scale=4;n.mc=10000;
## Y.max=2
##
## generate_data <- function(n, p, pi = 0.5, beta = 1, gamma = 1, rho = 2, cen.scale = 4,
## Y.max = NULL){
## W <- rbinom(n, 1, pi)
## X <- matrix(rnorm(n * p), n, p)
## numerator <- -log(runif(n))
## cox.ft <- (numerator / exp(beta * X[ ,1] + (-0.5 - gamma * X[ ,2]) * W))^2
## failure.time <- pmin(cox.ft, Y.max)
## numeratorC <- -log(runif(n))
## censor.time <- (numeratorC / (cen.scale ^ rho)) ^ (1 / rho)
## Y <- pmin(failure.time, censor.time)
## D <- as.integer(failure.time <= censor.time)
## list(X = X, Y = Y, W = W, D = D)
## }
##
## data <- generate_data(n, p=p, pi = pi, beta = beta, gamma = gamma, rho = rho, cen.scale = cen.scale,
## Y.max = Y.max)
##
## data.test <- generate_data(n.test, p=p, pi = pi, beta = beta, gamma = gamma, rho = rho, cen.scale = cen.scale,
## Y.max = Y.max)
##
## X=data$X
## Y=data$Y
## W=data$W
## D=data$D
##
## var_prog <- c("X1","X2")
## colnames(X) <- paste("X", 1:25, sep="")
## cov.names <- colnames(X)
##
## wbreg <- function(y, x, start = NULL, weights = NULL, offset = NULL, ...) {
## survreg(y ~ 0 + x, weights = weights, dist = "weibull", ...)
## }
##
## dat <- data.frame(Y=Y,D=D,W=W,X)
##
## eqn <- paste0("Surv(Y, D) ~ W + ",paste0(var_prog, collapse = "+")," | ",
## paste0(cov.names, collapse = "+"))
##
## glmtr <- partykit::mob(as.formula(eqn), data = dat,
## fit = wbreg, control = mob_control(parm=2,minsize = 0.2*nrow(dat),
## alpha = 0.10, bonferroni = TRUE))
##
## plot(glmtr)
##
## dat.test <- data.frame(Y=data.test$Y,D=data.test$D, W=data.test$W,data.test$X)
## pct <- 1:98/100
## quantile_pred <- predict(glmtr, newdata = dat.test, type = "quantile",p=pct) |0 |4 |17 |desertnaut |2023-06-03 12:00:46Z |
## | 76393686|35 |1 |r,macos,rstudio |2023-06-02 22:03:46Z |abraham | 9097452|659 |I know that this question is available in this page, but I have tried most of them, and I just canât make it works. How can I set an external program as blast permanently using Sys.setenv for my personal functions in R
## Iâm using a MacOS with R4.1.1 and RStudio 2021.09.0.
## The common way that I usually call an external program, in this case BLAST (ncbi-blast-2.2.29+) is just copy and paste the line in RStudio console, something like:
## Sys.setenv(PATH = paste(Sys.getenv("PATH"), "/Users/myname/myprograms/ncbi-blast-2.2.29+/bin/", sep= .Platform$path.sep))
##
## And then I just run my personal functions that use blast, and I have no problem with that. But I just want to set blast permanently in RStudio, and donât need to copy and paste that line every time that I start RStudio. So I tried to paste that line in .Renviron, and then start up Rstudio and tried to run blast functions, and it didnât work !!!
## So I tried different, add the path in .zshrc (Iâm using /bin/zsh), using nano or sublime text:
## nano ~/.zshrc
## export BLAST_PATH=$PATH:~/genetools/ncbi-blast-2.2.29+/bin/
##
## And then just add the BLAST_PATH to Sys.setenv in .Renviron
## nano .Renviron
##
## Sys.setenv(PATH = paste(Sys.getenv("PATH"), "BLAST_PATH", sep= .Platform$path.sep))
##
## or
## Sys.setenv(PATH = paste(Sys.getenv("PATH"), BLAST_PATH, sep= .Platform$path.sep))
##
## And restart RStudio and just it didnât work when I use my personal functions !!!
## Any way to set it permanently in RStudio ?
## By the way, My .Renviron is located at home (~/.Renviron) and my working directory (R sessions ) in RStudio is located in ~/R !!!
## Thanks |0 |8 |13 |Konrad Rudolph |2023-06-03 10:04:50Z |
## | 76393636|33 |0 |r,logistic-regression,roc,auc |2023-06-02 21:47:50Z |Jason Connelly | 21476994|1 |I am running a binary LASSO logistic regression using glmnet. The initial data I work with is raster spatial data. When I create an ROC (AUC ~ 0.72) curve based on the test data, the resulting curve appears to curve early and has a very strange shape (shown below).
## Does anyone know how I can interpret this curve and apply changes to my model to improve it?
##
## When I generate the ROC using the training data (AUC ~ 0.93), it does not appear like this.
## Also, when I run the same script on coarser resolution data (30 m as opposed to the 5 m currently being used), AUC curves on training and testing data look as expected (AUC's of ~0.94 and 0.90). |0 |0 |0 |Calimo |2023-06-03 06:13:47Z |
## | 76393577|23 |0 |r,for-loop,matrix,dplyr,set-difference |2023-06-02 21:33:40Z |moriahtaylor | 15127026|1 |I'm trying to take the row-wise set difference of two different list columns. So, line 1 column A would be a list of IDs for one year (m_lag), and line 2 column A would be a different list of IDs for the next year (m), and then finding the length of those set differences.
## I've tried both dplyr techniques and using a for loop. Some example code below that I've tried so far:
## for (i in 1:nrow(test)){
## m <- test[i,3][1]
## m_lag <- test[i,5][1]
## names(m_lag) <- "m"
##
## Diff1 <- setdiff(m_lag, m)
## Diff2 <- setdiff(m, m_lag)
##
## L1 <- length(Diff1)
## L2 <- length(Diff2)
##
## }
##
## test %>% mutate(Diff1 = setdiff(m_lag, m), Diff2 = setdiff(m, m_lag))
##
## test %>% mutate(Diff1 = map2(m_lag, m, setdiff), Diff2 = map2(m, m_lag, setdiff))
##
##
## I've also tried different variations of indexing, such as m[[1]] instead of m, and in the for loop using nrow() in place of length().
## What I need at the end (eventually) is the length of each difference represented as an integer. |0 |0 |0 |NA |NA |
## | 76393401|34 |1 |r,ggplot2,geom-col |2023-06-02 20:49:40Z |tatertot2 | 22010120|11 |This question already has answers here:
##
##
##
##
##
##
## Order discrete x scale by frequency/value
##
## (7 answers)
##
##
##
## How do you specifically order ggplot2 x axis instead of alphabetical order? [duplicate]
##
## (2 answers)
##
##
##
## ggplot2, Ordering y axis
##
## (1 answer)
##
##
##
## R ggplot ordering bars within groups
##
## (1 answer)
##
##
## Closed yesterday.
##
##
##
## I am attempting to switch the plotted bars so the first column is for Budgeted and the second column in for Actual. Any help is much appreciated.
##
## This is my code.
## library(tidyverse)
## library(ggplot2)
## library(dplyr)
## library(scales)
##
## Mississppi <-as.data.frame(R_test_8)
## head(Mississppi)
##
##
## ggplot() + geom_col(data = Mississppi, aes( x = Ship, y = Mandays , fill = Type), position = "dodge") + geom_point() +
## scale_y_continuous(labels = label_number (suffix = " K", scale = 1e-3), limits = c(0, 400000), breaks = seq(0, 400000, by = 50000)) + geom_hline(yintercept = 200000, color = "#7c7d81", linetype = 2) + scale_fill_manual(values = c("#6b9bb1", "#102031")) + geom_point() +
## theme_classic() + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) + ggtitle("Days of Labor (Thousands)") |0 |0 |2 |Peter |2023-06-02 21:07:47Z |
## | 76393378|34 |0 |r,regex,string |2023-06-02 20:45:29Z |Adrian | 3391549|9,275 |This question already has answers here:
##
##
##
##
##
##
## Replace Nth occurrence of a character in a string with something else
##
## (5 answers)
##
##
##
## how to replace nth character of a string in a column in r
##
## (3 answers)
##
##
## Closed yesterday.
## The community is reviewing whether to reopen this question as of 20 hours ago.
##
##
##
## mystring <- c("code IS (384333)\n AND parse = TURE \n ) \n
## \n code IS (43343344)\n ) some information here
## code IS (23423422) ) and more information")
##
## > gsub(pattern = 'code IS \\([0-9]+\\)[\\s\\S]+?\\)', replacement = "HELLO", mystring, perl = TRUE)
## [1] "HELLO \n \n \n HELLO some information here\n HELLO and more information"
##
## In the above example, using gsub replaced all occurrences of the pattern with HELLO. What if I want to just replace the 2nd occurrence (i.e., ind_replacement = 2)? The desired output is:
## [1] "code IS (384333)\n AND parse = TURE \n ) \n \n \n HELLO some information here\n code IS (23423422) ) and more information"
##
## Now, what about replacing the 2nd and 3rd occurrences (i.e., ind_replacement = c(2, 3))? The desired output is:
## [1] "code IS (384333)\n AND parse = TURE \n ) \n \n \n HELLO some information here\n HELLO and more information"
##
## Is there a way to write the code in a generalizable way that involves ind_replacement such that I'm only replacing multiple occurrences of a pattern?
## Edit:
## I've looked at the other suggested posts, but they do not answer my question. Specifically, I want to replace MULTIPLE occurrences of a pattern. For example, the first and third occurrences, or the second and third together.
## Trying something like the following to replace the 2nd and 3rd occurrences together does not work as intended:
## > gsub(pattern = 'code IS \\([0-9]+\\)[\\s\\S]+?\\){2-3}', replacement = "HELLO", mystring, perl = TRUE)
## [1] "code IS (384333)\n AND parse = TURE \n ) \n \n \n code IS (43343344)\n ) some information here\n code IS (23423422) ) and more information" |24 |73 |131 |Adrian |2023-06-03 02:22:43Z |
## | 76393250|11 |0 |r,shiny,shinydashboard,bs4dash,bslib |2023-06-02 20:19:08Z |bikeactuary | 4954719|455 |I am trying to adapt for my own application the last example in this documentation of creating valueBoxes with "showcase"d sparklines made interactive via plotly. The example does not go as far as rendering within a shiny app and the bslib package does not include render/output functions.
## I have gotten something sort of working via the renderUI/uiOutput functions but the result does not respect proportioning and positioning between the value and the showcased sparkline within the bs4Dash (or any other framework). Running the code interactively in RStudio shows the desired result in the Viewer pane. I am looking for help to match the rendered output in my shiny app to the article linked above
## MRE below
## ## app.R ##
## library(shiny)
## library(plotly)
## library(dplyr)
## library(bs4Dash)
## library(bslib)
##
##
## ui <- bs4DashPage(
## dashboardHeader(title = "Test Dash"),
## bs4DashSidebar(
## sidebarMenu(id = "tab",
## menuItem("Test 1", tabName = "t1", icon = icon("dashboard")),
## menuItem("Test 2", tabName = "t2", icon = icon("triangle-exclamation"))
## )
## ),
## bs4DashBody(
## tabItems(
## tabItem(tabName = "t1",
##
## fluidRow(
## box(width = 3,
## uiOutput("papq_vbox_quote")
## )
## )
## ),
## tabItem(tabName = "t2"
## )
## )
## )
## )
##
## server <- function(input, output) {
##
## dat <- tibble(Date = seq(Sys.Date()-59, Sys.Date(), by = 1),
## measure = rnorm(length(Date), 20 + (Date - min(Date)), 5))
##
## output$papq_vbox_quote <- renderUI({
##
## sparkline <- plot_ly(dat) %>%
## add_lines(
## x = ~Date, y = ~measure,
## color = I("white"), span = I(1),
## fill = 'tozeroy', alpha = 0.2
## ) %>%
## layout(
## xaxis = list(visible = F, showgrid = F, title = ""),
## yaxis = list(visible = F, showgrid = F, title = ""),
## hovermode = "x",
## margin = list(t = 0, r = 0, l = 0, b = 0),
## font = list(color = "white"),
## paper_bgcolor = "transparent",
## plot_bgcolor = "transparent"
## ) %>%
## config(displayModeBar = F) %>%
## htmlwidgets::onRender(
## "function(el) {
## var ro = new ResizeObserver(function() {
## var visible = el.offsetHeight > 200;
## Plotly.relayout(el, {'xaxis.visible': visible});
## });
## ro.observe(el);
## }"
## )
##
## value_box("Series Data",
## value = formatC(mean(dat$measure), format = "d", big.mark = ","),
## showcase = sparkline,
## showcase_layout = showcase_left_center(),
## full_screen = TRUE,
## # height = "100px",
## # width = .2,
## # max_height = "100px",
## theme_color = "success"
## ) %>%
## return()
## })
##
## }
##
## options(shiny.host = '0.0.0.0')
## options(shiny.port = 8080)
##
## shinyApp(ui, server) |0 |4 |16 |NA |NA |
## | 76393244|22 |0 |r,xaringan |2023-06-02 20:16:33Z |mikeytop | 14219038|150 |I am using the metropolis theme in a xaringan presentation. I would like to be able to negate the theme for a couple slides, or alternatively, use a new theme for some other slides.
## Most importantly, I want to be able to get a "blank white" slide with no overhead padding/colors/anything.
## So far, I have tried fiddling around with some css from other questions here and here.
## (EDIT: added content below)
## As of now, I have tried to create a new slide-style by creating a new CSS file, saving it in the same folder as my rmarkdown file, and calling it in the YAML:
## ----
## title: "The Effect of X"
## subtitle: "Conference"
## author: ""
## institute: ""
## date: "6/9/2023"
## output:
## xaringan::moon_reader:
## css: [default, metropolis, metropolis-fonts, "fullfigure.css"]
## lib_dir: libs
## nature:
## ratio: "16:9"
## highlightStyle: github
## highlightLines: true
## countIncrementalSlides: false
## editor_options:
## chunk_output_type: inline
##
##
## The CSS file (fullfigure.css) has the following code in it:
## .remark-slide-content.full-slide-fig{
## padding: 0px 0px 0px 0px;
## width: 100%;}
##
## I have tried using this to create a "full picture" slide by creating a new slide with the class full-slide-fig:
## ---
## class: full-slide-fig
## background-image: url("libs/figures/shotspot_works.png")
## background-size: cover
## ---
##
##
## However, this still gives me a blue overhead that cuts off part of the image. |0 |0 |9 |mikeytop |2023-06-02 20:34:22Z |
## | 76393217|28 |0 |r,for-loop,plot,pie-chart |2023-06-02 20:09:23Z |rheabedi1 | 20743292|61 |I have 5 genes and I want to plot their snp distribution in R.
## So, basically 5 plots in R.
## I tried to use loop for this but in my case it generate one plot only for a gene:
## The code that I tried is:
## set.seed(5)
## library(dplyr)
## library(tidyverse)
## library(tibble)
## gene_exp <- read.table(file = "chr22_gene.txt", header = TRUE, sep = "\t" )
## g <- round(runif(5,1,dim(gene_exp)[1])) ##creates 5 random var to select 5 random gene from chr22gene file
## gene <- sapply(strsplit(rownames(gene_exp),"_"),`[`, 2)
## for(i in g){
## genename <- gene[g]
## gene1 <- gene_exp %>%
## rownames_to_column() %>%
## separate(rowname, into = c(NA, 'gene_name'), sep = '_') %>%
## filter(gene_name == genename)
##
## rsid_list <- NULL
## snp <- strsplit(rownames(gene_exp), "_")
## for(i in 1:dim(gene1)[1]){
## value <- snp[[i]][1]
## rsid_list <- rbind(rsid_list,value)
## }
##
## rownames(gene1) <- rsid_list
## gene1 <- gene1[,-c(1)]
##
## ##pie plot: ##for same and different tissues:
## pdf(paste0("rplot",sep="_",genename,".pdf"))
##
## snp_onetissue <- dim(one_tissueonly)[1]
## snp_difftissue <- dim(samesnp_difft)[1]
## x <- c(snp_onetissue,snp_difftissue)
## labels <- c("onesnp_onetissue","samesnp_differentissue")
## #pie(x,labels)
## pct <- round(x/sum(x)*100)
## lbls <- paste(labels, pct)
## lbls <- paste(lbls,"%",sep="")
##
## pie(x, labels=lbls,main = paste0("pie plot of snps for gene:",genename),col = rainbow(length(x)))
## legend(0.8, 0.9, c("onesnp_onetissue","samesnp_differenttissue"), cex = 0.8,
## fill = rainbow(length(x)))
## dev.off()
## }
##
## This code generate the plot but only for one gene.
## Is there anyway to modify it to plot the pie chart for all the gene in separate pdf.
## Thank you. |0 |0 |7 |pjs |2023-06-02 21:21:25Z |
## | 76393207|11 |0 |r,selenium-webdriver,ip,rselenium,connection-timeout |2023-06-02 20:07:47Z |pcoppi | 9820648|23 |This problem seems to be relatively common but none of the fixes I have seen work for me.
## I am using 64 bit Windows 10.
## I am trying to use RSelenium using docker. I have been trying to roughly follow the documentation here:
## https://cran.r-project.org/web/packages/RSelenium/vignettes/docker.html
## I have tried starting the docker container using both of the following:
## docker run -d -p 4445:4444 selenium/standalone-chrome
##
## docker run -d -p 4445:4444 selenium/standalone-chrome:latest
##
## I have been using inspect on the resulting container to get the docker host IP. I found that it was 172.17.0.2.
## In R I have tried running the following, using the host IP I found above.
## remDr <- RSelenium::remoteDriver(remoteServerAddr = "172.17.0.2", port = 4445L, browserName = "chrome")
## Sys.sleep(15)
## remDr$open()
##
## I get the following output:
## Error in checkError(res) :
## Undefined error in httr call. httr output: Timeout was reached: [172.17.0.2:4445] Connection timeout after 10012 ms
##
## I have tried using "localhost" as the value for remoteServerAddr in remoteDriver(). Doing so does not give me a timeout but the remote driver does not seem to actually connect to anything:
## > remDr <- RSelenium::remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "chrome")
## > Sys.sleep(15)
## > remDr$open()
## [1] "Connecting to remote server"
## $id
## [1] NA
##
## I read that using "localhost" does not work on Windows in general, so it might not be worth paying attention to this.
## I also read somewhere that 172.17.0.2 is an internal address that is not open to the actual Windows computer and that I should use 192.168.99.100 instead. Using this second address gives me the same error:
## Error in checkError(res) :
## Undefined error in httr call. httr output: Timeout was reached: [192.168.99.100:4445] Connection timeout after 10009 ms
##
## Most of the advice I have seen suggests fixing this error by correctly defining remoteServerAddr in remoteDriver() so I am not sure what else I can do. I would like to try increasing the timeout time but I have not figured out how to do so for remDr$open(). I am aware that you can change the timeout time for remote drivers, but this seems to only be for commands that are executed once a server has been connected to. |0 |0 |2 |NA |NA |
## | 76392914|34 |0 |r,replace,gsub |2023-06-02 19:09:46Z |Simon Harmel | 16762740|913 |I'm getting the responses to a survey (which is a data.frame) but the responses included several unwanted special (*, " etc.) and HTML (<a8> etc.) characters such as the one show below.
## I was wondering if there might be a way to remove these unwanted characters from Input to achieve my Desired_output?
## Input = "<a8>""""If you do not participate in any school or community activities or clubs, please describe why. (If you do not want to respond to this item, type """"""""skip"""""""".)""""<a8>"
##
## Desired_output = "If you do not participate in any school or community activities or clubs, please describe why. (If you do not want to respond to this item, type "skip".)" |0 |0 |9 |Wiktor Stribiżew |2023-06-02 19:25:27Z |
## | 76392902|13 |0 |r,shiny |2023-06-02 19:07:51Z |ifoxfoot | 19803766|119 |I have a shiny app where an event can and should trigger multiple shinyModals, but it only shows one modal. Here is my app code:
## library(shiny)
## library(rlang)
## library(dplyr)
## library(shinyjs)
##
##
## ui = fluidPage(
## sidebarLayout(
##
## sidebarPanel(
## selectInput("select_data_type",
## "select data type",
## choices = c("letters", "numbers")),
## uiOutput("select_data"),
## actionButton("add_data", "Add Data")
## ),
##
## mainPanel(
## tableOutput("data")
## )
##
## )
## )
##
## server = function(session, input, output) {
##
## #empty df
## empty_df <- data.frame()
##
## #reactive to store data
## data <- reactiveVal(empty_df)
##
## #reactive to store modal dialog btn choice
## confirm_change <- reactiveVal("empty")
##
## #add data to reactive
## observeEvent(input$add_data, {
## dat <- data.frame(a = input$select_data) %>%
## dplyr::rename(!!sym(input$select_data_type) := a)
## if (nrow(data()) < 1) {
## data(dat)
## } else {
## new_dat <- rbind(data(), dat)
## data(new_dat)
## }
## })
##
## #output data selector
## output$select_data <- renderUI({
## if (input$select_data_type == "letters") {
## data_choices <- c("A", "B", "C")
## } else {data_choices <- c(1:3)}
##
## selectizeInput("select_data", "select data", c("", data_choices),
## selected = NULL,
## multiple = FALSE)
## })
##
## #output data table
## output$data <- renderTable({
## data()
## })
##
## #create reactive to store choices
## previous_dtypes <- reactiveValues(prev = NULL)
##
## #store current and previous data type value in reactive element
## observeEvent(input$select_data_type, {
## previous_dtypes$prev <- c(tail(previous_dtypes$prev, 1), input$select_data_type)
## })
##
## #if user changes letter, show modal
## observeEvent(input$select_data_type, {
## req(nrow(data()) > 0)
##
## #here I am attempting to some flow control of sorts to make sure modal isn't triggered endlessly
## if(confirm_change() != "empty") {
## confirm_change("empty") }
## else {
## #modal code
## showModal(modalDialog(
## "Are you sure you want to change your data type?! All data will be deleted!",
## footer = tagList(actionButton("confirm", "Yes, I'm Sure!",
## class = "btn-danger"),
## actionButton("cancel", "Cancel"))
## ))
## }
## })
##
## #if numbers is selected, show another modal
## observeEvent(input$select_data_type, {
## req(nrow(data()) == 0)
## if (input$select_data_type == "numbers"){
## showModal(modalDialog("UHG, NUMBERS?!"))
## }
## })
##
## #if confirm, erase all data
## observeEvent(input$confirm, {
## confirm_change(TRUE)
## data(empty_df)
## shinyjs::reset("select_data")
## removeModal()
## confirm_change("empty")
## })
##
## #if cancel, go back to prevous data type value
## observeEvent(input$cancel, {
## confirm_change(FALSE)
## updateSelectInput(session, inputId = "select_data_type",
## selected = previous_dtypes$prev[1])
## removeModal()
## })
##
## }
##
## shinyApp(ui, server)
##
##
## to reproduce my problem open the app, select any letter from the select data dropdown list, then click add data. Then go back up to the select data type dropdown list and select numbers. A modal will pop up asking to confirm changing the data type. click "yes, I'm sure". It should work but no other modals pop up. What I want is for the the "uhg numbers" modal to pop up.
## The "uhg numbers" modal should pop up only if a. the user choses numbers when there is no other data entered, or b. the user changes to numbers and then clicks "yes, I'm sure" button in the confirm data type modal. |0 |0 |6 |NA |NA |
## | 76392893|38 |1 |r,openxlsx |2023-06-02 19:06:26Z |Laura | 21174713|31 |I'm using R to process some data and then OpenXLSX to output to an .xlsx file. I'd like negative numbers to be in accounting format, so in parenthesis (-100 is (100)), but without the $ or decimals.
## If I add the parenthesis during my data processing phase before the OpenXLSX section, that turns it into a character vector, which does not work, I need it to export as a number type. I've also tried
## num <- createStyle(numFmt = "ACCOUNTING")
## addStyle(wb, 1, style = num, rows = 4:53, cols = 3:6, gridExpand = T, stack = T)
##
## which does add the parenthesis but also adds a dollar sign and two decimals, which I do not want. |0 |0 |4 |r2evans |2023-06-02 19:29:56Z |
## | 76392884|33 |0 |r,range,nested-for-loop |2023-06-02 19:05:27Z |grace.cutler | 20999380|61 |I apologize in advance for this headache, and in particular, the minimum amount of data I need to give you to reproduce my situation.
## I have two data frames that look like:
## > dput(df_long[1:60,])
## structure(list(id = c("20230420-01", "20230420-01", "20230420-01",
## "20230420-01", "20230420-01", "20230420-01", "20230420-01", "20230420-01",
## "20230420-01", "20230420-01", "20230420-01", "20230420-01", "20230420-01",
## "20230420-01", "20230420-01", "20230420-01", "20230420-01", "20230420-01",
## "20230420-01", "20230420-01", "20230420-01", "20230420-01", "20230420-01",
## "20230420-01", "20230420-01", "20230420-01", "20230420-01", "20230420-01",
## "20230420-01", "20230420-01", "20230420-02", "20230420-02", "20230420-02",
## "20230420-02", "20230420-02", "20230420-02", "20230420-02", "20230420-02",
## "20230420-02", "20230420-02", "20230420-02", "20230420-02", "20230420-02",
## "20230420-02", "20230420-02", "20230420-02", "20230420-02", "20230420-02",
## "20230420-02", "20230420-02", "20230420-02", "20230420-02", "20230420-02",
## "20230420-02", "20230420-02", "20230420-02", "20230420-02", "20230420-02",
## "20230420-02", "20230420-02"), condition = c("control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control", "control", "control",
## "control", "control", "control", "control"), pairscores = c(4.1,
## 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1,
## 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1,
## 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1,
## 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1,
## 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1), round = c("1", "2", "3",
## "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
## "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
## "27", "28", "29", "30", "1", "2", "3", "4", "5", "6", "7", "8",
## "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
## "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"
## ), win = c(1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1,
## 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
## 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1,
## 1), sound = c(1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 3, 5, 6,
## 7, 8, 8, 1, 4, 8, 6, 7, 8, 5, 4, 8, 8, 8, 1, 1, 2, 1, 2, 1, 1,
## 2, 2, 2, 1, 1, 2, 3, 5, 6, 7, 8, 8, 1, 4, 8, 6, 7, 8, 5, 4, 8,
## 8, 8), ttrs = c(34.8679761886597, 53.7323212623596, 72.872288942337,
## 95.00337266922, 108.467720985413, 127.519412279129, 140.932771205902,
## 152.619723320007, 164.774888753891, 189.440443992615, 199.848607540131,
## 211.736871480942, 228.6589448452, 240.730955839157, 258.806998729706,
## 272.826892614365, 285.802761793137, 301.402906894684, 314.37718629837,
## 330.579266309738, 342.812170028686, 353.819888830185, 370.405727863312,
## 379.25874209404, 386.349495410919, 398.328526258469, 408.155629873276,
## 420.099550485611, 430.813805341721, 441.291796922684, 34.8679761886597,
## 53.7323212623596, 72.872288942337, 95.00337266922, 108.467720985413,
## 127.519412279129, 140.932771205902, 152.619723320007, 164.774888753891,
## 189.440443992615, 199.848607540131, 211.736871480942, 228.6589448452,
## 240.730955839157, 258.806998729706, 272.826892614365, 285.802761793137,
## 301.402906894684, 314.37718629837, 330.579266309738, 342.812170028686,
## 353.819888830185, 370.405727863312, 379.25874209404, 386.349495410919,
## 398.328526258469, 408.155629873276, 420.099550485611, 430.813805341721,
## 441.291796922684), ttbp = c(42.8691244125366, 57.7340142726898,
## 79.8788452148437, 101.018859148026, 114.473237752914, 130.520850658417,
## 144.944416761398, 158.633060216904, 173.786515951157, 193.448391675949,
## 203.859516382217, 220.743315935135, 233.666549682617, 249.745782613754,
## 265.810203790665, 278.836834669113, 291.811623334885, 302.416656732559,
## 320.388904809952, 335.58996462822, 347.819845914841, 361.828748226166,
## 372.408302783966, 380.263983488083, 390.360696315765, 402.329761266708,
## 411.160075902939, 425.111953496933, 435.816273927689, 450.295118093491,
## 42.8691244125366, 57.7340142726898, 79.8788452148437, 101.018859148026,
## 114.473237752914, 130.520850658417, 144.944416761398, 158.633060216904,
## 173.786515951157, 193.448391675949, 203.859516382217, 220.743315935135,
## 233.666549682617, 249.745782613754, 265.810203790665, 278.836834669113,
## 291.811623334885, 302.416656732559, 320.388904809952, 335.58996462822,
## 347.819845914841, 361.828748226166, 372.408302783966, 380.263983488083,
## 390.360696315765, 402.329761266708, 411.160075902939, 425.111953496933,
## 435.816273927689, 450.295118093491), ttbi = c(50.7323212623596,
## 69.872288942337, 92.00337266922, 105.467720985413, 124.519412279129,
## 137.932771205902, 149.619723320007, 161.774888753891, 186.440443992615,
## 196.848607540131, 208.736871480942, 225.6589448452, 237.730955839157,
## 255.806998729706, 269.826892614365, 282.802761793137, 298.402906894684,
## 311.37718629837, 327.579266309738, 339.811122179031, 350.818850278854,
## 367.405727863312, 376.25874209404, 383.348484039307, 395.328526258469,
## 405.155629873276, 417.099550485611, 427.813805341721, 438.291796922684,
## 454.063094377518, 50.7323212623596, 69.872288942337, 92.00337266922,
## 105.467720985413, 124.519412279129, 137.932771205902, 149.619723320007,
## 161.774888753891, 186.440443992615, 196.848607540131, 208.736871480942,
## 225.6589448452, 237.730955839157, 255.806998729706, 269.826892614365,
## 282.802761793137, 298.402906894684, 311.37718629837, 327.579266309738,
## 339.811122179031, 350.818850278854, 367.405727863312, 376.25874209404,
## 383.348484039307, 395.328526258469, 405.155629873276, 417.099550485611,
## 427.813805341721, 438.291796922684, 454.063094377518)), row.names = c(NA,
## -60L), class = c("tbl_df", "tbl", "data.frame"))
##
## and the other one I must upload as a .txt file (https://drive.google.com/file/d/1oZ-0y5rAEsEuQ0gA_9bdWH6fr4wT7xkJ/view?usp=sharing)
## I would like to get a mean of each emotion in au_slim within au_slim$timestamp ranges equal to range df_long$ttbi - 1 to df_long$ttbi - 1 for each row in df_long. I need to keep track of these by the id in each df, and by the df_long$win associated with each df_long$ttbi. My expected output would be a new df that looks something like:
##
## id
## win
## ttbi
## happiness
## sadness
## surprise
## fear
## anger
## disgust
## 20230420-01
## 1
## 1
## #
## #
## #
## #
## #
## #
## 20230420-01
## 0
## 2
## #
## #
## #
## #
## #
## #
## 20230420-01
## 1
## 3
## #
## #
## #
## #
## #
## #
##
## This would, of course, continue for all ids, where each id would have 30 rows (for the 30 ttbi's). I have been poking at this for a few days with basically no success. Frankly, I am just way in over my head code-wise. I know I need to use a for loop to iterate through each id, and probably a nested for loop to run each ttbi in each id. Any help would be great.
## *Note that a lot of the cells in the new df will be 0 (or very close to it). That's fine. |0 |0 |4 |grace.cutler |2023-06-03 06:48:49Z |
## | 76392840|40 |1 |r,dataûtable |2023-06-02 18:58:05Z |baobab | 22007784|13 |I want to conditionally generate output in a column based on values in two other columns using data.table. This is something I want to achieve using a function containing 'if' statements specifically, as I'm just trying to learn how to use data.table. The if statement contains shift(), and I think that this may be the cause of my problems.
## What I have tried so far is the following:
## library(data.table)
##
## #Data
##
## DT <- data.table(V1 = sample(LETTERS[1:3], 20, replace = TRUE),
## V2 = sample(1:5, 20, replace = TRUE))
##
## #function
##
## fun1 <- function(x, y){
## if(x == "C" & shift(y, type = "lead") > y){
## return("Greater")
## } else if(x == "C" & shift(y, type = "lead") < y){
## return("Lesser")
## } else{
## return(NA)
## }
## }
##
## #function implementation
##
## DT.v1 <- DT[, V3 := mapply(fun1, x = V1, y = V2)]
##
## If I run the above, I get the error:
## Error in if (x == "C" & shift(y, type = "lead") > y) { :
## missing value where TRUE/FALSE needed
##
## I had an inkling that this error may be caused by comparison with NA in the last iteration as shift(y, type = "lead") would equal NA. Addition of the condition !is.na(shift(y, type = "lead")) in the 'if' statement did stop the error from appearing but leads to generation of only NULL values.
## I have been able to generate my desired output (see script below) but would also like to learn how i can achieve this using 'if' statements in a function.
## DT.v2 <- DT[V1 == "C" & shift(V2, type = "lead") > V2, V3 := "Greater"][
## V1 == "C" & shift(V2, type = "lead") < V2, V3 := "Lesser"]
##
## #or an alternative way to generate the desired output:
##
## DT.v3 <- DT[, V3 := ifelse(V1 == "C" & shift(V2, type = "lead") > V2, "Greater",
## ifelse(V1 == "C" & shift(V2, type = "lead") < V2, "Lesser", NA))]
##
## Can someone help me understand how to implement the function in a correct way? Thanks for taking the time to help! |0 |0 |2 |NA |2023-06-02 19:28:42Z, 2023-06-02 19:28:18Z |
## | 76392784|13 |0 |r,ggplot2 |2023-06-02 18:50:02Z |Diego | 14058289|31 |This question already has answers here:
##
##
##
##
##
##
## Use ggplot2 to plot multiple curves
##
## (1 answer)
##
##
##
## Add multiple curves / functions to one ggplot through looping
##
## (2 answers)
##
##
## Closed yesterday.
##
##
##
## I am creating a function that automatically plots curves based on user input. Using the curve() function in base R I managed to plot it like this:
##
## I would prefer, however, to use ggplot2 instead of base R, but it seems that I need to create a geom_function() line for every curve instead of it being determined automatially based on user input. Is there a solution using loops or maybe lapply? Here is a recreation of my data and the part of the script for plotting that I used with base R:
## df<-data.frame(
## PseudoA=c(0.3287622, 0.8539452, 0.3293448, 0.1859212, 0.7030731, 0.1382480, 0.4184386, 1.0786543, 1.3189188, 0.9217081, 2.6668777),
## PseudoB=c(1.0387213, -0.4028699, 0.3206788, 0.8403824, -0.3619186, 0.22062480, -2.95486273, -1.73832891, -1.54281422, -0.44192827, -0.78999262),
## items<-c('i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','i11')
## )
##
## rownames(df)<-df$items
##
## p<-list()
## eq<- function(x){(1/(1+2.71828^(-1.7*(df$PseudoA[1]*(x-df$PseudoB[1])))))}
## p<-curve(eq, col="white", xlim=c(-4,4),ylim=c(0,1), xlab="Level of Trait", ylab="p(1.0)")# setting up an empty plot to add the curves
## colors<-rainbow(n = nrow(df))
##
## for(i in 1:11){
## eq<-function(x){(1/(1+2.71828^(-1.7*(df$PseudoA[i]*(x-df$PseudoB[i])))))}
## p[i]<-curve(eq, col=colors[i], xlim=c(-4,4), ylim=c(0,1), main="Item Characteristic Curve", lwd=3,add=TRUE)
## legend(x=-4, y=1, legend=rownames(df), fill=colors, pt.cex=3)
## p
## }
##
## Keep in mind that the user would specify the range in the loop (instead of being 1:11 it could be any other range, like 5:9). Any tips would be appreciated! |0 |0 |2 |NA |NA |
## | 76392725|23 |1 |r,ggplot2 |2023-06-02 18:38:25Z |caveman | 19361693|79 |I'm trying to set a dynamic yaxis label that adjusts to either millions or thousands. Because the data is constantly changing each time pulled in, I can't just hardcode it.
## I couldn't find anything on the subject so I was thinking something like an if_else statement would work, but I'm getting an error.
## Reprex is using ChickWeight dataset. Here I was trying to get it scale to million since it meets the minimum threshold, but if it didn't to switch to thousand. Thinking maybe I need to include min and max. Would an if_else even be the right approach? Thanks for any suggestions.
## df <- ChickWeight
##
## lumpy <- (ChickWeight$weight * 10000)
##
## df %>%
## ggplot(., aes(x = `Time`, y = lumpy)) +
## if_else(lumpy >= 1000000,
## scale_y_continuous(name = NULL,
## labels = ~scales::comma(., scale = 1/1000000, suffix = "M", accuracy = 1)),
## scale_y_continuous(name = NULL,
## labels = ~scales::comma(., scale = 1/100000, suffix = "K", accuracy = 1))) |0 |0 |6 |NA |NA |
## | 76392709|17 |0 |r,ggplot2,legend,facet-wrap,facet-grid |2023-06-02 18:34:36Z |Linton | 8671652|11 |I am using ggplot2 to create a heatmap, and am grouping my y variable by a categorical variable using facet_grid(). My y variables are genes of interest, and I am grouping them by their respective Pathways using facet_grid().
## The pathway names are so long that I gave most of them acronyms, but want to use a legend to detail exactly what each acronym stands for.
## How can I add a separate legend specifically for the variable I am calling in facet_grid()?
## So far, everything I have found online tells me how to customize legends pertaining to the x or y variable - but I can't seem to find anything on how to create a legend specifically for the facet_grid() or facet_wrap() variables.
## I already have a legend describing the color gradient in the heatmap, and do not want to lose this legend.
## I have also included a screenshot of the kind of legend I'd like to make.
## I have attached a screenshot of one of my heatmaps showing the facet_grid() labels - some of these labels are abbrevations that I want to explain in a legend. |0 |0 |2 |Linton |2023-06-02 20:41:42Z |
## | 76392631|53 |2 |r,dataframe,summarize |2023-06-02 18:22:09Z |stats_noob | 13203841|5,117 |I am working with the R programming language.
## I have the following dataset:
## library(dplyr)
##
## df = structure(list(ethnicity = c("c", "c", "c", "b", "c", "b", "b",
## "b", "c", "a", "b", "b", "a", "b", "c", "a", "c", "c", "a", "a",
## "a", "a", "c", "b", "c", "b", "a", "b", "c", "b", "a", "c", "c",
## "a", "c", "b", "a", "c", "a", "a", "b", "c", "c", "a", "c", "a",
## "c", "b", "a", "b", "a", "a", "c", "a", "b", "a", "a", "c", "a",
## "b", "a", "c", "a", "c", "b", "c", "b", "b", "c", "b", "b", "c",
## "c", "a", "b", "b", "a", "b", "a", "a", "b", "c", "c", "a", "b",
## "a", "b", "a", "c", "c", "b", "c", "a", "b", "b", "c", "b", "a",
## "c", "c"), number_of_degrees = c(3L, 2L, 2L, 3L, 1L, 1L, 3L,
## 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 3L, 2L,
## 3L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 1L,
## 2L, 1L, 3L, 3L, 2L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 3L, 2L, 1L, 3L,
## 3L, 3L, 1L, 2L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 2L, 3L, 3L, 1L,
## 3L, 2L, 1L, 1L, 2L, 3L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 3L,
## 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 2L)), class = "data.frame", row.names = c(NA,
## -100L))
##
##
## df %>%
## # Group the data by number_of_degrees
## group_by(number_of_degrees) %>%
## # Calculate the percentage of each ethnicity within each group
## summarize(
## percent_a = mean(ethnicity == "a") * 100,
## percent_b = mean(ethnicity == "b") * 100,
## percent_c = mean(ethnicity == "c") * 100
## )
##
## This produces the following output:
## # A tibble: 3 x 4
## number_of_degrees percent_a percent_b percent_c
## <int> <dbl> <dbl> <dbl>
## 1 1 33.3 36.7 30
## 2 2 31.6 21.1 47.4
## 3 3 34.4 40.6 25
##
## My Question: Is there a more "compact" way to write this code such that I don't have to manually write "percent_a","percent_b", etc.? This way, it would be much faster and automatically do it for all values of ethnicity. |3 |21 |72 |ThomasIsCoding |2023-06-02 18:50:17Z, 2023-06-02 18:49:34Z, 2023-06-02 20:03:15Z |
## | 76392610|27 |0 |r,rename,mutate |2023-06-02 18:18:35Z |Sara Haas | 22009067|11 |I am trying to rename observations in R so that I can filter and I keep running into errors. For example, if the data set was
## farmer fruit number
## 1 Sally strawberries 32
## 2 Joe strawberry 12
## 3 Bob pear 42
##
## but I wanted to calculate how many farmers are growing strawberries.
## How would I get it to be
## farmer fruit number
## 1 Sally **strawberry** 32
## 2 Joe strawberry 12
## 3 Bob pear 42
##
## I tried
## df1 <- df %>%
## mutate(fruit = str_replace(fruit, "strawberries", "strawberry"))
##
## and nothing changed.
## I also tried
## df1 <- df %>%
## + mutate(fruit = as.character(fruit)) %>%
## + mutate(fruit = replace(fruit, fruit = "strawberries", "strawberry"))
##
## and got an error. |0 |0 |0 |joran |2023-06-02 18:19:59Z, 2023-06-02 18:29:28Z |
## | 76392410|29 |1 |r,plot |2023-06-02 17:46:11Z |Adeeb Graissy | 14773496|11 |I am making a diagram with R and i want to make a diagram showing movie budgets for different genres.
## My problem is that most movies have more than one genre, for example (Drama, Comdey) or (Drama,Adventure, Sci-Fi):
##
## How can i make movies have only one genre?, like (Drama), (Adventure), (Horror), (Western),...
## for example movie with genre(Drama, Comdey) and movie with genre (Drama,Adventure, Sci-Fi) will be added to genre (Drama) |0 |0 |2 |Konrad Rudolph |2023-06-02 17:51:17Z |
## | 76392409|39 |0 |r,ggplot2,histogram |2023-06-02 17:45:54Z |Billy | 19763658|27 |I have a dataset which has multiple columns and one of them is a categorical variable. The categorical variable takes 3 values: 1, 2, 3. I want to create a plot in ggplot which will have the 3 histograms one below the other and then I want to add below the third histogram, the histogram in which we don't take into consideration the categorical variable. So far I have written the below code
## #The name of the dataset is dataset.clust
##
## dataset.clust$cluster = factor(dataset.clust$cluster)
##
## library(patchwork)
## library(ggplot2
##
## dataset.clust$cluster = factor(dataset.clust$cluster)
##
## hist_plot1 = ggplot(dataset.clust, aes(x = population, fill = cluster)) +
## geom_histogram(position = "identity", alpha = 0.7) +
## facet_wrap(~ cluster, nrow = 3) +
## scale_fill_manual(values = c("red", "green", "blue")) +
## guides(fill = guide_legend(title = "Cluster")) +
## ggtitle("Grouped Histograms for population")
##
## # Second Histogram
## hist_plot2 = ggplot(dataset.clust, aes(x = population)) +
## geom_histogram(fill = "yellow", color = "black")
##
## # Combine histograms and arrange vertically
## combined_plot = hist_plot1 / hist_plot2
##
## print(combined_plot)
##
## The above code gives me this
## Does anyone know how to combine these plots in one so that the four plots have the same x and y axis? |0 |0 |6 |Billy |2023-06-03 14:43:04Z, 2023-06-03 15:45:07Z |
## | 76392386|10 |0 |r,mathematical-optimization,portfolio,r-portfolioanalytics |2023-06-02 17:40:40Z |Manolo Medina | 19671360|13 |How to add a VaR constraint to the portfolio optimization function?
## I'm trying to optimize a portfolio using PortfolioAnalytics maximizing the Sharpe Ratio, given the following constraints:
## The sum of the weights must equal 1.
## The maximum weight per asset must not exceed 0.1.
## The maximum VaR of the porfolio must not exceed 0.025.
## I have tried adding the VaR constraint...
## # As type = "risk".
## portf <- PortfolioAnalytics::add.constraint(portf, type = "risk",
## return_target = -0.025,
## sense = "<=",
## name = "VaR")
## ######
## portf <- PortfolioAnalytics::add.constraint(portf, type = "risk", name = "VaR",
## max = 0.025)
##
## # As type = "return".
##
## portf <- PortfolioAnalytics::add.constraint(portf, type = "return",
## return_type = "VaR",
## return_threshold = -0.025,
## sens = "<=")
## ####
## portf <- PortfolioAnalytics::add.constraint(portf, type = "return", name = "VaR",
## max = 0.025)
##
## # As add.objective(portf, type = "risk")
##
## port <- PortfolioAnalytics::add.objective(port, type = "risk",
## name = "VaR", max = 0.025)
##
##
## Any help would be greatly appreciated.
## Thanks in advance! |0 |0 |3 |NA |NA |
## | 76392367|25 |0 |r,googlesheets4 |2023-06-02 17:37:06Z |Zack | 11672339|98 |When using the package googlesheets4 is there any method for writing data to a sheet skipping the first row so that the data being written to a sheet starts at row 2? I am hoping to leverage something similar to when you read a sheet and utilize ex. skip = 2 to read data starting at the 3rd row
## I have tried the following which does not work
## write_sheet(data = df, ss = "google_sheet_url", skip = 1, sheet = "test") |0 |0 |6 |Konrad Rudolph |2023-06-02 17:52:31Z, 2023-06-02 19:03:55Z |
## | 76392347|15 |0 |r,mixed-models |2023-06-02 17:33:27Z |Phoenix Byrne | 19648368|1 |I want to run a mixed ANCOVA with the Block as a within-subject factor and Condition as a between-subject factor. My DV is ResponseBias and my covariates are Age and mA. Here is an example of what my dataframe looks like:
## Subject Condition Block Age mA ResponseBias
## 705 c 1 22 2.1 0.133297538
## 705 2 0.109737012
## 705 3 -0.064639146
## 706 u 1 21 0.9 -0.012777052
## 706 2 -0.118587142
## 706 3 -0.423479985
##
## My question is, how do I deal with the fact that some variables are not the same length? Surely duplicating the Condition, Age and mA columns per subject ID to make them the same length would influence the ANCOVA?
## I hope this makes sense, any clarification would be greatly appreciated! |0 |0 |0 |Phil |2023-06-02 18:18:43Z |
## | 76392326|15 |0 |r,r-markdown,knitr |2023-06-02 17:28:41Z |qdread | 2854608|3,309 |I would like to use results = 'asis' in a RMarkdown chunk option, to programmatically print section headings in a loop, as described in the cookbook.
## I would also like to have multiple figures side-by-side using fig.show = 'hold', out.width = '50%' as also described in the cookbook.
## However when I try to combine both those options into one chunk, it doesn't work because it causes all the headings to print first, then all the plots at the end.
## Is there any way to programmatically create headings in a loop and show side-by-side figures at the same time?
## Example of what I have tried
## ---
## title: "fig.show asis reprex"
## output: html_document
## ---
##
## ```{r, results = 'asis', fig.show = 'hold', out.width = '50%', echo = FALSE, warning = FALSE, message = FALSE}
## library(ggplot2)
##
## for (i in 1:3) {
## cat(paste('\n\n# Column', i, 'of the wonderful mtcars dataset\n\n'))
## print(ggplot(mtcars, aes_string(x = names(mtcars)[1])) + geom_boxplot())
## print(ggplot(mtcars, aes_string(x = names(mtcars)[1])) + geom_histogram())
##
## }
## ```
##
## Sadly, I get this output, instead of having each header appear directly above the appropriate figures: |0 |19 |34 |NA |NA |
## | 76392259|33 |0 |r,plot,time-series,scatter-plot |2023-06-02 17:18:21Z |Damiano | 20177423|1 |I am trying to create a series of scatterplots (x=PC1, y=PC2) for a bunch of data I have collected. I am having issues plotting the data through time, specifically, automating the creation of plots relative to specific time bins. I have hundreds of entries and I already wasted too much time trying to divide the dataset manually.
## The data has been collected similarly to this example:
##
## SPECIES
## GROUP
## APPEARANCE
## DISAPPEARANCE
## PC1
## PC2
## Alpha
## A
## 1
## 2
## -0.100119669
## 0.0595756
## Beta
## B
## 1
## 2
## 0.217854357
## -0.010260947
## Gamma
## B
## 1
## 5
## 0.193998962
## -0.029757206
## Delta
## B
## 2
## 3
## 0.126089613
## 0.014409345
## Epsilon
## D
## 3
## 4
## -0.094182811
## -0.004406088
##
## Here we assume that each time bin lasts 1 unit (i.e. a species appears at time 1 and disappears at time 2). As you see, some species last longer than others, thus belonging to multiple time bins.
## I have tried all that my beginner knowledge allowed me to conjure and searched for an answer on the internet, but I am still very much a pen-and-paper scientist. I really need some help with this.
## I have tried all that my beginner knowledge allowed me to conjure and searched for an answer on the internet. |0 |0 |0 |Phil |2023-06-02 18:17:50Z |
## | 76392228|19 |0 |r,regression,lme4 |2023-06-02 17:14:14Z |Svenja | 22009129|11 |I have a dataset on students in classrooms. After multiple multilevel imputation with mice (pooling with mitml), I have 20 datasets. I now want to apply multilevel regression. The "normal" regressions work fine, but as soon as I include interaction terms I don't understand the output anymore. Example:
## I want to calculate the effect of the interaction between mean achievement (meanmath, L2) and classroom climate (cc, L2) on individual achievement (math, L1). The equation looks like this:
## Int1 <- with(data, lmer(math ~ meanmath*cc + (1|classID)))
##
## In the output I now get the following estimates:
## (Intercept) 0.34
## meanmath 0.22
## cc1 -0.43
## cc2 -0.69
## cc3 -0.66
## meanmath*cc1 -0.16
## meanmath*cc2 0.12
## meanmath*cc3 0.23
##
## These cc1-3 variables do not exist in my dataset, neither in the original one, nor in the imputed ones. Could maybe someone tell me how I could find where these variables come from?
## I tried to run the equation with just one of the imputed datasets -> Same thing happened
## I made sure that there are the same variables in all imputed datasets -> This is the case
## Details from comment:
## data$cc is a Factor w/ 4 Levels: "1", "2", "3", "4". Otherwise the variables are all continuous. |0 |0 |0 |user20650 |2023-06-02 17:24:44Z |
## | 76392177|34 |0 |r |2023-06-02 17:05:33Z |Sou | 21976989|11 |How do you number values by group in R?
## for example i want the following table:
## 100 1
## 100 2
## 100 3
## 200 1
## 200 2
## 200 3
## 300 1
## 300 2
## 300 3
##
## I want to write a function that will create a unique value for each member of particular group. Group here being '100', '200', '300'. I want to generate unique value for each member of the group and then when the group changes, I want R to start numbering each of the values in that group from 1 again. How do I accomplish this? Is it possible to do this with base R? |0 |0 |0 |margusl |2023-06-02 17:10:46Z |
## | 76392149|26 |0 |r,linear-programming,lpsolve |2023-06-02 17:00:22Z |Lcsballer1 | 15343136|97 |Here is the data I'm working with:
## structure(list(Name = c("Jokic", "Butler", "Murray", "Adebayo",
## "Porter", "Gordon", "Martin", "Pope", "Vincent", "Lowry", "Brown",
## "Strus", "Robinson", "Green", "Highsmith"), Points = c(62.8,
## 48.8, 45.8, 41.8, 35.3, 30.3, 29.3, 23.8, 23.3, 22.3, 21.8, 19,
## 16.3, 8.5, 6.8), Cost = c(14000, 13400, 10800, 9200, 7600, 6600,
## 7400, 5600, 5800, 5200, 6200, 4800, 4200, 2200, 1400)), class = c("tbl_df",
## "tbl", "data.frame"), row.names = c(NA, -15L))
##
## Here's my current code:
## library(readxl)
## library(tidyverse)
## library(lpSolveAPI)
##
## # Read the data from Excel
## data <- read_excel("C:/Users/M0185JN/Downloads/NBA_1.xlsx")
## data$cpt_points <- 1.5*data$Points
## data$cpt_cost <- 1.5*data$Cost
##
## #Players
## num_players <- rep(1,nrow(data))
## num_captain <- rep(1,nrow(data))
##
## # Define the objective function coefficients
## obj <- data$Points*num_players + data$cpt_points*num_captain
##
## # Create a new LP model
## lprec <- make.lp(nrow(data), nrow(data))
##
## # Set the optimization direction to maximize
## lp.control(lprec, sense = "maximize")
##
## # Set the objective function coefficients
## set.objfn(lprec, obj)
##
## # Set type of decision variables
## set.type(lprec, 1:nrow(data), type = "binary")
##
## # Constraint: Pick exactly 5 players
## add.constraint(lprec, num_players, "=", 5)
## add.constraint(lprec, num_captain, "=", 1)
##
## # Constraint: Total cost must be less than or equal to 50,000
## add.constraint(lprec, data$Cost*num_players + data$cpt_cost*num_captain, "<=", 50000)
##
## # Constraint: No Duplicate Players
## add.constraint(lprec, num_players + num_captain, "<=", 1)
##
## # Solve the linear programming problem
## solve(lprec)
##
## # Get the solution status
## status <- get.solutioncount(lprec)
##
## # Check if a solution was found
## if (status > 0) {
## # Retrieve the values of the decision variables
## player_picked <- get.variables(lprec)
##
## # Create a data frame with the players and their corresponding picked status
## result <- data.frame(Name = data$Name, Picked = player_picked)
##
## # Filter the data frame to show only the players that were picked (Picked = 1)
## picked_players <- result[result$Picked == 1, ]
##
## # Print the picked players
## print(picked_players)
## } else {
## print("No feasible solution found.")
## }
##
## The code is continuing to give me a "No Feasible Solution Found." when I know for a fact there is. Here are the constraints better explained:
## I have to pick 6 players
## 1 is going to get cpt_points and cpt_cost while the others are getting Points and Cost.
## You can also not pick the same player to get both.
## Total cost cannot exceed 50000. |0 |0 |8 |Dave2e |2023-06-02 20:57:51Z, 2023-06-02 22:31:42Z |
## | 76392055|46 |0 |r,regex,string |2023-06-02 16:44:33Z |Adrian | 3391549|9,275 |library(stringr)
## string <- string <- c("pat1 hello333\n pat2 ok i mean pat1 again pat2 some more text pat1")
##
## I want to match all strings that start with pat1 and end with pat2.
## > str_extract_all(
## string,
## regex(
## "pat1.+pat2",
## dotall=TRUE
## )
## )
## [[1]]
## [1] "pat1 hello333\n pat2 ok i mean pat1 again pat2"
##
## This gives me 1 string that starts with pat1 and ends with pat2. However, my desired output is something like:
## > output
## [1] "pat1 hello333\n pat2"
## [2] "pat1 again pat2" |24 |73 |131 |NA |NA |
## | 76391960|28 |0 |r |2023-06-02 16:29:38Z |dicegods | 13332692|25 |I have a function here, where my intent is to add a record to the table. The column name is dynamically defined based on the firstCharVar variable.
## The dataframe tblname is a blank table. The first character field in that table is called myvar. There are other columns in that table, and they should remain blank.
## #update tables if no records
## NoData = function(tblname) {
## if (nrow(tblname) == 0) {
##
## #get column name of first character field
## allColumns = data.frame(
## colName = colnames(tblname),
## colIndex = 1:ncol(tblname),
## colClass = sapply(tblname, class)
## )
## charVars = allColumns[allColumns$colClass == 'character', ]
## firstCharVar = unfactor(charVars$colName[1])
##
## #run insert statement
## #this doesn't work
## #Error: unexpected '=' in "tblname = tblname %>% add_row(!!firstCharVar ="
## #tblname = add_row(tblname, !!firstCharVar = 'No Data Found')
##
## #but this does
## tblname = add_row(tblname, myvar = 'No Data Found')
##
##
## #clean up stuff used in function
## #rm(allColumns, charVars, firstCharVar)
## }}
##
## temp2 = NoData(temp2) |0 |0 |6 |NA |NA |
## | 76391906|32 |1 |r,ggplot2,custom-function |2023-06-02 16:21:37Z |seansteele | 12032850|619 |TLDR -Why doesn't this custom function work?
## library(ggplot2)
## dat <- data.frame(date = c("2023-06-01", "2023-06-02", "2023-06-03"),
## shots = c(8, 12, 10),
## passes = c(340, 410, 390))
##
## cust_ggplot <- function(df, date, metric){
##
## min_date <- min({{ date }}) # invalid 'type' (closure argument)
## # min_date <- min(!! ensym(date)) # Error in !ensym(x): invalid argument type.
## # min_date <- min(df[[date]]) # works if date is enclosed in quotes like this dat |> cust_title("date", "shots")
## # But I want it to look like tidyverse function with no quotes needed.
##
## }
##
## dat |>
## cust_ggplot(date, shots)
##
## I would like to emulate the tidyverse style and not have to use dat |> cust_title("date", "shots")
## Full walkthru:
## I am trying to turn this simple ggplot code into a function.
## ### Data
## dat <- data.frame(date = c("2023-06-01", "2023-06-02", "2023-06-03"),
## shots = c(8, 12, 10),
## passes = c(340, 410, 390)
## )
##
## ### Working Code
## dat |>
## ggplot(aes(x = date, y = shots)) +
## geom_col() +
## ggtitle(
## paste(
## "Shot Totals from",
## min(dat$date),
## "-",
## max(dat$date)
## )
## )
##
## I can get the plot to work just fine via
## ### Turn it into a function
##
## # This portion works
## cust_ggplot <- function(df, date, metric){
##
## df |>
## ggplot(aes(x = {{ date }}, y = {{ metric }} )) +
## geom_col()
##
## }
##
## dat |>
## cust_ggplot(date, shots) +
## ggtitle("Shot Totals")
##
## But when I try to find the min or max dates to pass to the subtitle, none of my attempts work.
## cust_ggplot <- function(df, date, metric){
##
## # min_date <- min({{ date }}) invalid 'type' (closure argument)
## # min_date <- min(!! ensym(date)) #Error in !ensym(x): invalid argument type.
## # min_date <- min(df[[date]]) # works if date is enclosed in quotes like this dat |> cust_title("date", "shots")
## # But I want it to look like tidyverse function with no quotes needed.
##
## df |>
## ggplot(aes(x = {{ date }}, y = {{ metric }} )) +
## geom_col()
##
## } |0 |3 |10 |NA |2023-06-02 16:49:39Z |
## | 76391856|13 |0 |r,tidyverse |2023-06-02 16:15:46Z |Gato | 14336726|365 |I cannot understand what's going on. I think this is related to the updating of R that I did before this problem occured. After running the code below I get an error message "Error in enc2utf8(path) : argument is not a character vector"
## My data comes from an ordinary csv file.
## Mydata <- read_csv(Mydata, col_names = c(
## "var1", "var2", "var3", "var4",
## "var5", "var6", "var7")) %>%
## mutate(var7 = factor(var7)) |0 |2 |10 |NA |NA |
## | 76391852|31 |0 |r,ggplot2,histogram |2023-06-02 16:15:02Z |roma | 21763379|25 |(if you are only interested in the problem, then go to "What if in short?")
## What kind of stupid question?
## I'm doing work and before that I built all graphics with x and I don't want to change the style.
## And now I need a histogram, but it does not suit me with ggplot2.
## What do I mean?
## I took the width of the column from hist(), so there will be the same number of them
## (which can be seen from the graphs),
## but in hist() and as I want,
## the bars do NOT cross the important/magic number 0.0012,
## and in `geom_histogramm' intersects.
## And if it's short?
## How to "shift" histogram bars with ggplot2 so that they do not cross a certain number (0.0012)?
## Or, how to make a histogram shorter with "data" from hist() and design with ggplot2?
## Here is my code:
## # check bin width
## standart_hist <- hist(my_vector, plot = F)
## bw <- standart_hist$breaks[2] - standart_hist$breaks[1]
##
## # create hist with ggplot and bw from standart hist
## gghist <- ggplot(mapping = aes(my_vector)) +
## geom_histogram(
## binwidth = bw,
## color = "black",
## fill = "white"
## )
##
## and result:
## my hist
##
## standard hist
##
## FIX:
## from joran --- instead of geom_histogram() use stat_bin() as here:
## stat_bin(geom = 'bar',breaks = <breaks vector from hist() output>)
##
## My data:
## my_vector <- (0.001201367, 0.001199250, 0.001198337, 0.001199200, 0.001199353, 0.001198439, 0.001202447, 0.001205639, 0.001207056, 0.001209714, 0.001204478, 0.001200064, 0.001199386, 0.001199976, 0.001200569, 0.001204738, 0.001208508, 0.001201491, 0.001200995, 0.001199861, 0.001200242, 0.001196367, 0.001200365, 0.001201807, 0.001194364, 0.001197196, 0.001192705, 0.001196178, 0.001192991, 0.001189777, 0.001194227, 0.001197158, 0.001204336, 0.001201081, 0.001201100, 0.001204755, 0.001198810, 0.001202090, 0.001194370, 0.001188529, 0.001191450, 0.001193616, 0.001195733, 0.001198886, 0.001201353, 0.001206878, 0.001201262, 0.001194806, 0.001196192, 0.001193215, 0.001195030, 0.001198202, 0.001184351, 0.001191890, 0.001192882, 0.001194621, 0.001203256, 0.001204150, 0.001197425, 0.001198002, 0.001196185, 0.001194915, 0.001198281, 0.001201858, 0.001195349, 0.001196401, 0.001205476, 0.001201740, 0.001197276, 0.001189442, 0.001192760, 0.001196846, 0.001201342, 0.001204854, 0.001202979, 0.001203136, 0.001199926, 0.001197398, 0.001199905, 0.001199252, 0.001198486, 0.001197114, 0.001196829, 0.001200228, 0.001199666, 0.001194918, 0.001204005, 0.001201363, 0.001204183, 0.001205889, 0.001204553, 0.001202369, 0.001203922, 0.001197001, 0.001200020, 0.001202672, 0.001201746, 0.001203532, 0.001198699, 0.001200975, 0.001202635, 0.001203121, 0.001190614, 0.001199029, 0.001200372, 0.001193731, 0.001193428, 0.001200259, 0.001195203, 0.001194854, 0.001193173, 0.001198266, 0.001195362, 0.001195252, 0.001201008, 0.001199291, 0.001196653, 0.001200357, 0.001201623, 0.001207463, 0.001199381, 0.001198047, 0.001196305, 0.001200419, 0.001208689, 0.001197434, 0.001193885, 0.001198708, 0.001204741, 0.001204281, 0.001193663, 0.001200234, 0.001203809, 0.001199003, 0.001195127, 0.001192189, 0.001187610, 0.001191390, 0.001200602, 0.001197817, 0.001202045, 0.001203998, 0.001205508, 0.001201051, 0.001202057, 0.001208911, 0.001203928, 0.001202267, 0.001201434, 0.001202647, 0.001210024, 0.001210509, 0.001207881, 0.001206928, 0.001206128, 0.001203866, 0.001202204, 0.001204511, 0.001202310, 0.001197504, 0.001199019, 0.001200713, 0.001204197, 0.001204649, 0.001207965, 0.001201847, 0.001200585, 0.001203446, 0.001195972, 0.001202405, 0.001197182, 0.001191603, 0.001197663, 0.001202259, 0.001201008, 0.001200354, 0.001198090, 0.001193479, 0.001202457, 0.001201156, 0.001196038, 0.001201092, 0.001205488, 0.001212173, 0.001203497, 0.001208846, 0.001198349, 0.001200047, 0.001200799, 0.001206939, 0.001207142, 0.001201970, 0.001202742, 0.001204795, 0.001198463, 0.001201559, 0.001201344, 0.001206085, 0.001205526, 0.001197508) |0 |0 |8 |roma |2023-06-02 16:54:20Z |
## | 76391835|14 |0 |r,ggplot2 |2023-06-02 16:12:52Z |Bruno Avila | 18543318|43 |I'm on a project to evaluate the competitive risk between my variable of interest and the competitive event. I created the following code that returned a graph.
## cuminc(Surv(TIME,STATUS,type="mstate") ~ SEX, BASE) %>%
## ggcuminc(outcome = c(â1â,â2â)) + xlim(0,90) + ylim(0,0.2) + add_risktable() + scale_x_continuous(breaks = seq(0, 90, by = 10), limits = c(0, 90)) +theme_classic() + theme(legend.position = 'bottom', legend.direction = âhorizontalâ)
##
##
## I'd like to insert some information and formatting, but I'm not having success.
## 1 - Insert the p-value (0.12 and 0.25) and the name of the variables (death and non-death) at the end of the upper and lower lines;
## 2 - Change the legend, because 1 and 2 are coming. I would like to change it to death and non-death. I tried using ârenameâ inside âoutcomeâ and it didn't work. |0 |0 |3 |NA |NA |
## | 76391830|10 |0 |r,linear-regression,glm,interaction |2023-06-02 16:12:36Z |Ajna F Kertesz | 22008813|1 |I'm running a linear regression model with a 3-way interaction, and the model runs, but it does not give me the actual interactions, how can I check if there's any interaction between the 3 terms (smart, behave and grades)?
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Conformity_pref ~ Smart * Behave * Grades + (1 | PID)
## Data: MV_final
##
## AIC BIC logLik deviance df.resid
## 8310.7 8340.2 -4151.4 8302.7 11585
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.62796 -0.11064 0.07686 0.11559 2.22275
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 20.66 4.545
## Number of obs: 11589, groups: PID, 1295
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.83492 0.30598 12.53 <2e-16 ***
## Smart -0.81602 0.07850 -10.39 <2e-16 ***
## Behave 1.15429 0.08672 13.31 <2e-16 ***
## ---
## Signif. codes: 0 â***â 0.001 â**â 0.01 â*â 0.05 â.â 0.1 â â 1
##
## Correlation of Fixed Effects:
## (Intr) Smart
## Smart -0.228
## Behave -0.005 0.415
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 5 columns / coefficients
##
## I expect prediction terms such as Smart x Behave, Behave x Grades, Grades x Smart. |0 |0 |0 |Phil |2023-06-02 16:33:46Z |
## | 76391751|19 |0 |r,if-statement,match |2023-06-02 16:01:00Z |hcg | 7395807|316 |I have two datasets using r:
## df_100= data.frame(siteid=c(seq(1,5,1),conflu=c(3,2,4,5,6),diflu=c(9,2,30,2,5))
## df_full= data.frame(siteid=c(seq(1,10,2),conflu=c(6,3,5,2,3),diflu=c(5,9,2,30,7))
##
## If the siteid is the same between df_100 and df_full, I want to take the difference between the conflu columns of each data frame and the same with the diflu columns. I also want that output to be put into a new dataframe, where the siteid is retained and the difference between the columns creates a new column. For example:
## df_difference=data.frame(siteid=c(1,3,5), diff_con=c(3,1,-3), diff_dif=c(-4,-18,2)) |0 |2 |14 |NA |NA |
## | 76391676|21 |0 |r,ggplot2,icons,gt,mutate |2023-06-02 15:50:44Z |Essi | 8962951|767 |I want to create a table with different symbols depending on a string. I have a big df with multiple character values in one cell. I created this simple example:
## df = as.data.frame(c("AP1, BP3, CP2, AN2", "AN3", "BN1, CP3, AP3", "AP2, BN3, CN1"))
## colnames(df) = "V1"
##
## df$V2 = c("CP1", "BN2, CP1", "AP2, AN1", "CP3")
## df$V3 = c("BP2", "AN2, CP2", "AP3, BN1", "CN1")
##
## V1 V2 V3
## 1 AP1, BP3, CP2, AN2 CP1 BP2
## 2 AN3 BN2, CP1 AN2, CP2
## 3 BN1, CP3, AP3 AP2, AN1 AP3, BN1
## 4 AP2, BN3, CN1 CP3 CN1
##
## I want to mutate the character values into different colored symbols with the letters A, B or C in it. For example "AP1", should be a light red square with an "A" in it. The first letter is the letter that should be in the symbol, the 2nd letter is either "P" or "N". P should be red, N should be blue. And the number in the string is the intensity of the color, so 1 is only light, 3 is strong.
## I know gt-table and also how to mutate CP1 into a symbol like that. But the problem now is, that I have multiple entries within 1 cell.
## I would like to devide each column into a P and N column and then list the individual strings in rows by 2, so that in the end I have something like that:
##
## Any idea how I can easily transfer this df into a table like that? |3 |12 |20 |NA |NA |
## | 76391552|44 |0 |r,regex,string |2023-06-02 15:30:40Z |Adrian | 3391549|9,275 |This question already has answers here:
##
##
##
##
##
##
## How do I match any character across multiple lines in a regular expression?
##
## (26 answers)
##
##
## Closed yesterday.
##
##
##
## mystring <- "some irrelevant information999 \n ELSEIF\n (\n number IN (11112234) \n AND value = 1 \n AND reference >= 230.0\n )) criteria ) followed by more irrelevant info \n \n"
##
## From the string above, I would like to extract the substring that starts with ELSEIF\n (\n and ends with )) criteria ).
## Interestingly, I can match the starting pattern and ending pattern separately:
## library(stringr)
## > str_extract_all(mystring, "ELSEIF\n[[:space:]]+\\(\n")
## [[1]]
## [1] "ELSEIF\n (\n"
##
## > str_extract_all(mystring, "\\)\\) criteria \\)")
## [[1]]
## [1] ")) criteria )"
##
## but just not together:
## > str_extract_all(mystring, "ELSEIF\n[[:space:]]+\\(\n.+\\)\\) criteria \\)")
## [[1]]
## character(0)
##
## The desired output is this:
## > output
## [1] "ELSEIF\n (\n number IN (11112234) \n AND value = 1 \n AND reference >= 230.0\n )) criteria )" |24 |73 |131 |Adrian |2023-06-02 16:38:34Z, 2023-06-02 17:08:05Z, 2023-06-02 16:16:01Z |
## | 76391482|22 |0 |r,ggplot2 |2023-06-02 15:20:50Z |MOHAMMED | 14380761|396 |I want to plot some functions with their gradients using the ggplot2 package in r.
## p = 3
## n0 = 100
## z0 = seq(0.01, 0.99, length = n0)
## AB0 = matrix(rbeta(600,4,1), nrow = n0)
##
## library(ggplot2)
## ab.names=c(paste("g",1:p,sep=""),paste("g' ",1:p,sep=""))
## pl0=ggplot(data.frame(ab = c(AB0), ID = rep(ab.names, each = n0), Z = z0),
## aes(x = Z, y = ab)) +
## geom_point() +
## facet_wrap(~ID, scales = "free",nrow = 2, ncol = p) + theme_bw() +
## ggtitle("Unpenalized VCM",expression(nabla~"g and "~"g")) + ylab("")
##
## I want to switch the rows and add the nabla symbol in the title of the plots in the 1st row before switching them to the second row. To be clear, the 1st row is for the functions, and the 2nd row is for the gradients where the nabla symbol should appear in the title.
## Here is a screenshot of the outcome |0 |0 |9 |NA |NA |
## | 76391426|7 |0 |r,amazon-web-services,docker,aws-lambda,dockerfile |2023-06-02 15:14:08Z |user_123 | 2588072|98 |This is my current error when testing my lambda function through the AWS web interface. I'm unable to find the libraries to use the function
## {
## "error_message": "Error in library(aws.s3): there is no package called âaws.s3â\n"
## }
##
## OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k
## INFO [2023-06-02 14:43:28] Handler found: process_landsat_temp.process
## INFO [2023-06-02 14:43:28] Using function process from process_landsat_temp.R
## INFO [2023-06-02 14:43:28] Querying for events
## START RequestId: 70264185-8846-4b70-9399-dfe8e58ed066 Version: $LATEST
## ERROR [2023-06-02 14:43:28] Error in library(aws.s3): there is no package called âaws.s3â
## END RequestId: 70264185-8846-4b70-9399-dfe8e58ed066
##
## This is my dockerfile to create the container image. I've read that lambda expects libraries to be installed at /opt/lib, and I was setting that using the R_LIBS_USER.
## FROM public.ecr.aws/lambda/provided:al2
##
## ENV R_VERSION=4.3.0
##
## RUN yum -y install amazon-linux-extras && \
## yum clean all
##
## RUN amazon-linux-extras install kernel-5.15 && \
## yum clean all
##
## RUN yum -y install wget-1.14 && \
## yum -y install glib2-2.56.1-9.amzn2.0.3 && \
## yum -y install libssh2-1.4.3-12.amzn2.2.4 && \
## yum clean all
##
## RUN yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \
## && wget --no-verbose https://cdn.rstudio.com/r/centos-7/pkgs/R-${R_VERSION}-1-1.x86_64.rpm \
## && yum -y install R-${R_VERSION}-1-1.x86_64.rpm \
## && rm R-${R_VERSION}-1-1.x86_64.rpm && \
## yum clean all
##
## ENV PATH="${PATH}:/opt/R/${R_VERSION}/bin/"
##
## RUN yum -y install openssl-devel && \
## yum install -y \
## openldap-2.4.44-25.amzn2.0.5 && \
## yum clean all && rm -rf /var/lib/apt/lists/*
##
## ENV R_LIBS_USER=/opt/lib
##
## RUN Rscript -e "install.packages(c('aws.s3','httr', 'jsonlite', 'logger', 'logging', 'sp', 'raster', 'parallel'), INSTALL_opts=c('--no-docs', '--no-help','--no-html', '--no-multiarch', '--no-test-load'), dependencies=TRUE, repos=c('https://cloud.r-project.org/','https://artifactory.wma.chs.usgs.gov/artifactory/r-cran-mirror/','https://RForge.net'))"
##
## #copy scripts to working directory
## COPY runtime.R bootstrap.R process_landsat_temp.R testFunction.R ${LAMBDA_TASK_ROOT}/
## RUN chmod 755 -R "${LAMBDA_TASK_ROOT}"
##
## RUN printf '#!/bin/sh\ncd /var/task\nRscript runtime.R' > /var/runtime/bootstrap \
## && chmod +x /var/runtime/bootstrap
##
##
## This my runtime.R script, which seems to be able to use the libraries now problem. I've tried changing the library variable to be in the same location as the runtime (/var/runtime) but that didn't work
## library(httr)
## library(logger)
## log_formatter(formatter_paste)
## log_threshold(INFO)
##
## #' Convert a list to a single character, preserving names
## #' prettify_list(list("a" = 1, "b" = 2, "c" = 3))
## #' # "a=5, b=5, c=5"
## prettify_list <- function(x) {
## paste(
## paste(names(x), x, sep = "="),
## collapse = ", "
## )
## }
##
## # error handling with http codes
## # from http://adv-r.had.co.nz/Exceptions-Debugging.html
## condition <- function(subclass, message, code, call = sys.call(-1), ...) {
## structure(
## class = c(subclass, "condition"),
## list(message = message, code = code, call = call),
## ...
## )
## }
## stop_api <- function(message, code = 500, call = sys.call(-1), ...) {
## stop(condition(c("api_error", "error"), message, code = code, call = call,
## ...))
## }
##
## log_debug("Deriving lambda runtime API endpoints from environment variables")
## lambda_runtime_api <- Sys.getenv("AWS_LAMBDA_RUNTIME_API")
## if (lambda_runtime_api == "") {
## error_message <- "AWS_LAMBDA_RUNTIME_API environment variable undefined"
## log_error(error_message)
## stop(error_message)
## }
## next_invocation_endpoint <- paste0(
## "http://", lambda_runtime_api, "/2018-06-01/runtime/invocation/next"
## )
## initialisation_error_endpoint <- paste0(
## "http://", lambda_runtime_api, "/2018-06-01/runtime/init/error"
## )
##
## tryCatch(
## {
## log_debug("Determining handler from environment variables")
## handler <- Sys.getenv("_HANDLER")
## if (is.null(handler) || handler == "") {
## stop_api("_HANDLER environment variable undefined")
## }
## log_info("Handler found:", handler)
## handler_split <- strsplit(handler, ".", fixed = TRUE)[[1]]
## file_name <- paste0(handler_split[1], ".R")
## function_name <- handler_split[2]
## log_info("Using function", function_name, "from", file_name)
##
## log_debug("Checking if", file_name, "exists")
## if (!file.exists(file_name)) {
## stop_api(file_name, " doesn't exist in ", getwd())
## }
## source(file_name)
##
## log_debug("Checking if", function_name, "is defined")
## if (!exists(function_name)) {
## stop_api("Function name ", function_name, " isn't defined in R")
## }
## log_debug("Checking if", function_name, "is a function")
## if (!is.function(eval(parse(text = function_name)))) {
## stop_api("Function name ", function_name, " is not a function")
## }
## },
## api_error = function(e) {
## log_error(as.character(e))
## POST(
## url = initialisation_error_endpoint,
## body = list(
## statusCode = e$code,
## error_message = as.character(e$message)),
## encode = "json"
## )
## stop(e)
## }
## )
##
## handle_event <- function(event) {
## status_code <- status_code(event)
## log_debug("Status code:", status_code)
## if (status_code != 200) {
## stop_api("Didn't get status code 200. Status code: ", status_code,
## code = 400)
## }
## event_headers <- headers(event)
##
## # HTTP headers are case-insensitive
## names(event_headers) <- tolower(names(event_headers))
## log_debug("Event headers:", prettify_list(event_headers))
##
## aws_request_id <- event_headers[["lambda-runtime-aws-request-id"]]
## if (is.null(aws_request_id)) {
## stop_api("Could not find lambda-runtime-aws-request-id header in event",
## code = 400)
## }
##
## # According to the AWS guide, the below is used by "X-Ray SDK"
## runtime_trace_id <- event_headers[["lambda-runtime-trace-id"]]
## if (!is.null(runtime_trace_id)) {
## Sys.setenv("_X_AMZN_TRACE_ID" = runtime_trace_id)
## }
##
## # we need to parse the event in four contexts before sending to the lambda fn:
## # 1a) direct invocation with no function args (empty event)
## # 1b) direct invocation with function args (parse and send entire event)
## # 2a) api endpoint with no args (parse HTTP request, confirm null request
## # element; send empty list)
## # 2b) api endpoint with args (parse HTTP request, confirm non-null request
## # element; extract and send it)
##
## unparsed_content <- httr::content(event, "text", encoding = "UTF-8")
## # Thank you to Menno Schellekens for this fix for Cloudwatch events
## is_scheduled_event <- grepl("Scheduled Event", unparsed_content)
## if(is_scheduled_event) log_info("Event type is scheduled")
## log_debug("Unparsed content:", unparsed_content)
## if (unparsed_content == "" || is_scheduled_event) {
## # (1a) direct invocation with no args (or scheduled request)
## event_content <- list()
## } else {
## # (1b, 2a or 2b)
## event_content <- jsonlite::fromJSON(unparsed_content)
## }
##
## # if you want to do any additional inspection of the event body (including
## # other http request elements if it's an endpoint), you can do that here!
##
## # change `http_req_element` if you'd prefer to send the http request `body` to
## # the lambda fn, rather than the query parameters
## # (note that query string params are always strings! your lambda fn may need to
## # convert them back to numeric/logical/Date/etc.)
## is_http_req <- FALSE
## http_req_element <- "queryStringParameters"
##
## if (http_req_element %in% names(event_content)) {
## is_http_req <- TRUE
## if (is.null(event_content[[http_req_element]])) {
## # (2a) api request with no args
## event_content <- list()
## } else {
## # (2b) api request with args
## event_content <- event_content[[http_req_element]]
## }
## }
##
## result <- do.call(function_name, event_content)
## log_debug("Result:", as.character(result))
## response_endpoint <- paste0(
## "http://", lambda_runtime_api, "/2018-06-01/runtime/invocation/",
## aws_request_id, "/response"
## )
## # aws api gateway is a bit particular about the response format
## body <- if (is_http_req) {
## list(
## isBase64Encoded = FALSE,
## statusCode = 200L,
## body = as.character(jsonlite::toJSON(result, auto_unbox = TRUE))
## )
## } else {
## result
## }
## POST(
## url = response_endpoint,
## body = body,
## encode = "json"
## )
## rm("aws_request_id") # so we don't report errors to an outdated endpoint
## }
##
## log_info("Querying for events")
## while (TRUE) {
## tryCatch(
## {
## event <- GET(url = next_invocation_endpoint)
## log_debug("Event received")
## handle_event(event)
## },
## api_error = function(e) {
## log_error(as.character(e))
## aws_request_id <-
## headers(event)[["lambda-runtime-aws-request-id"]]
## if (exists("aws_request_id")) {
## log_debug("POSTing invocation error for ID:", aws_request_id)
## invocation_error_endpoint <- paste0(
## "http://", lambda_runtime_api, "/2018-06-01/runtime/invocation/",
## aws_request_id, "/error"
## )
## POST(
## url = invocation_error_endpoint,
## body = list(
## statusCode = e$code,
## error_message = as.character(e$message)),
## encode = "json"
## )
## } else {
## log_debug("No invocation ID!",
## "Can't clear this request from the queue.")
## }
## },
## error = function(e) {
## log_error(as.character(e))
## aws_request_id <-
## headers(event)[["lambda-runtime-aws-request-id"]]
## if (exists("aws_request_id")) {
## log_debug("POSTing invocation error for ID:", aws_request_id)
## invocation_error_endpoint <- paste0(
## "http://", lambda_runtime_api, "/2018-06-01/runtime/invocation/",
## aws_request_id, "/error"
## )
## POST(
## url = invocation_error_endpoint,
## body = list(error_message = as.character(e)),
## encode = "json"
## )
## } else {
## log_debug("No invocation ID!",
## "Can't clear this request from the queue.")
## }
## }
## )
## }
##
## This is a simplified version my function process_landsat_temp.R. The full version requires these libraries to do some image processing within an AWS s3 bucket
## process <- function(number) {
## library(aws.s3)
## library(httr)
## library(jsonlite)
## library(sp)
## library(raster)
## library(parallel)
##
## list(parity = if (as.integer(number) %% 2 == 0) "even" else "odd")
## } |0 |0 |10 |NA |NA |
## | 76391151|37 |0 |r,merge,tidyverse |2023-06-02 14:42:16Z |GaB | 7425379|1,058 |How program to join 2 different tables based on which one has the highest number of rows, with tidyverse? Now, the total_number_views_ndinstict has only 8 but in the future this may have more rows than the second total_number_views_unique_na which currently has 10 rows. I need both columns in the joined table.
## Here is the first table:
## > total_number_views_ndinstict
## # A tibble: 8 Ã 2
## app_name n_distinct_users
## <chr> <int>
## 1 animals_to_groups 2
## 2 cage_randomiser 5
## 3 combo_cor 1
## 4 crispr_screen_viz 21
## 5 dep_map_bem 4
## 6 growth_rate_explorer 3
## 7 moprospector 2
## 8 translatability_single_gene 17
##
## And the second table is
## > total_number_views_unique_na
## # A tibble: 10 Ã 2
## app_name users_na
## <chr> <int>
## 1 animals_to_groups 21
## 2 cage_randomiser 14
## 3 combo_cor 14
## 4 crispr_screen_viz 1
## 5 dep_map_bem 0
## 6 dtp_browser_prod 6
## 7 flat 81
## 8 growth_rate_explorer 48
## 9 moprospector 0
## 10 translatability_single_gene 2
##
## Can someone help? |2 |15 |29 |NA |NA |
## | 76391100|22 |0 |r,dataframe,sum,cumulative-sum |2023-06-02 14:34:13Z |MKN17 | 18638709|27 |Consider this df, where Percent is the percentage of sales that the person accounted for in the entire group. The Total/Percent column is the sum of values in the Percent column up to the current row. I've tried using the roll_sum() function from the RcppRoll library however it didn't work as it was only going back 1 window at a time. Is there a way to do this in R without using a for loop? |0 |0 |6 |NA |NA |
## | 76391093|32 |0 |r |2023-06-02 14:33:00Z |Cato | 22006017|1 |I am a complete beginner so bare with me :D
## I did an experiment where I noted the number of mature animals for my ten different treatments on day 4 to day 12. I now want to see whether there is a difference in this increase, so whether a certain treatment might see animals mature faster than others. Per treatment I have six replicates.
## So the idea is to make a graph where i see the increasing number of mature animals from day 4 to day 12, with one regression line for each treatment. I then want to compare to see whether they actually differ, probably by using the slope?
## So my data looks like this:
## Treatment Replicate Mature_D4 ... Mature_D12
##
## Control 1 0 12
## Control 2 0 12
## Control 3 1 11
## Control 4 0 9
## Control 5 1 8
## Control 6 0 12
## Treatment 1 1 0 7
## ...
## Treatment 9 6 1 8
##
## I have a lot of NAs and clearly left-skewed data.
## I thought about a glm() because it is count data. I am wondering if I could do all treatments in one glm() but don't know how to extract the slope for each treatment. Or I could do a glm() for each treatment seperately but then I need to check the assumptions for each, and what if they are not met or I need another model? Is it a bad idea to compare the slope of different models? |0 |0 |0 |margusl |2023-06-02 14:51:29Z |
## | 76390879|28 |2 |r,shiny,shiny-server,sockjs |2023-06-02 14:06:09Z |Nick L | 11251650|31 |I currently run my RShiny apps in AWS ECS in containers I build with a Dockerfile using the rocker/shiny base image.
## I deploy my applications in a test and a prod environment. The prod environment has more WAF rules that I must follow to be compliant with my organization's standards.
## The apps work in test but fail in prod. I have learned that the SockJS POST is being blocked by the Prod WAF because of its "content-type header is NULL" policy.
## Taking a closer look, in production, the keep-alive POST requests coming from SockJS have no response headers, causing the POST to fail and the app to continuously retry to establish a connection, only to be blocked over and over.
##
## In the working test environment, I can see that these keep-alive requests start with 0 response headers:
## And 25 seconds later, the request is fulfilled with a 200 response and a new request is open. The fulfilled request now has 19 response headers including content-type.
##
## This repeats every 25 seconds. I see in the shiny-server docs that there is a shiny-server.conf parameter sockjs_heartbeat_delay that defaults to 25 seconds.
## Unfortunately this is where my knowledge drops off. The questions I hope to answer are:
## Am I correct in thinking that it could be possible to add the content-type header to these SockJS requests before they resolve?
## Is there a shiny-server.conf parameter that can be used to add this header, remove these keep-alive requests, or use a different protocol to xhr that works differently with these heartbeat requests?
## Am I able to edit the source code of shiny-server or SockJS from within my rocker/shiny docker container? If so, where can I add a line to add the content-type header to my request?
## Environment specs:
## Base image rocker/shiny:latest
## Shiny Server v1.5.21.1004
## Node.js v16.18.1
## Ubuntu 20.04.5
## Update #1:
## I am reading in the shiny-server docs and the disable_protocols parameter says that disabling websocket and streaming should leave only polling left, which should work with most load balancers. After looking into the definition of streaming vs polling, I wonder if the nature I described in my test environment is streaming. I confirmed that streaming is disabled in the test environment, so I wonder if shiny-server is not correctly disabling xhr-streaming. |0 |0 |3 |Nick L |2023-06-02 18:37:31Z |
## | 76390817|16 |0 |r |2023-06-02 13:59:26Z |Kate Vasi | 21955371|1 |This question was migrated from Stack Overflow because it can be answered on Cross Validated.
## Migrated yesterday.
##
##
##
##
##
##
##
## I conducted a content analysis on social media posts regarding CEO communication about sociopolitical topics. I have two levels of analysis entities: 1) the social media account of a CEO, 2) the social media posts per account.
## One question I wanted to examine (on the level of 1) social media accounts) is whether the affiliation of an account to a specific industry does correlate with the number of sociopolitical posts - in other words, do the CEO social media accounts of specific industries have more sociopolitical posts than others?
## I conducted a one-way ANOVA with "industry of account" as my independent variable, which is a nominal variable (codes "1" to "7§), and the "number of sociopolitical posts" as my explained variable, which is continuous (no codes, but frequency numbers, ranges from 1 to 76).
## That's the code I used:
## data$industry <- as.factor(data$industry)
## anova <- aov(post_sp ~ industry, data = data)
## etaSquared(anova, anova = TRUE)
## pairwise.t.test(data$post_sp, data$industry)
##
## Everything worked well except for the pairwise Posthoc test, I didn't receive any data. That's when I asked myself if it's even possible to conduct an ANOVA with the variable "number of sociopolitical posts", or if I have to recode it somehow. |0 |0 |0 |Kate Vasi |2023-06-03 10:28:28Z |
## | 76390798|42 |-2 |r,subset |2023-06-02 13:56:16Z |Olivia Orr | 22007928|1 |Trying to subset a dataset by column name and I keep getting an error that undefined columns selected.
## This is the code I'm using:
## df2 <- wide_data[,c("famID", "i1", "i2", "s1", "s2")]
##
## I'm unsure what I'm doing wrong becuase I made sure everything is spelled correctly! |0 |0 |0 |jpsmith |2023-06-02 14:00:41Z |
## | 76390501|20 |-1 |r,tar |2023-06-02 13:14:00Z |Julia K | 22007783|1 |I know how to untar a file, but I want to keep all of the infomation into one file not 12 different files after it is untar'ed. I use a MacBook.
## I want to compute the data into R so I need the untared files to be in one file. Or will R analyze many files and imput it into one table once the metadata is run. I have the latest version of RStudio |0 |0 |0 |Phil |2023-06-02 14:18:36Z |
## | 76390478|32 |0 |r,lubridate,survival-analysis |2023-06-02 13:11:20Z |ZheFrench | 602268|1,165 |From a survival study, I have the following pre-computed interval of times as months for overall survival :
## 25.733333 1.000000 1.933333 2.800000 2.866667
##
## I would like to import them in lubridate to convert them as days.
## Usually I directly work with two dates and I compute as what follows :
## dataframe.final$Diff.days <- interval(dataframe.final$Date.First.Immunotherapy , dataframe.final$Date.Death.Or.Last.Contact) %>% as.numeric('days')
##
## But in this case, I have only the interval given as a decimal number. |3 |21 |46 |NA |NA |
## | 76390426|30 |0 |r |2023-06-02 13:05:06Z |Miquel | 15760316|57 |This question already has answers here:
##
##
##
##
##
##
## How can I format axis labels with exponents with ggplot2 and scales?
##
## (7 answers)
##
##
## Closed yesterday.
##
##
##
## I have a problem with the y-axis format with ggplot.
## In the experiment we analyzed the contaminant from two sites (A and B). I would like to have the y-axis with scientific annotation, however, I am unable to obtain it.
## This is the code I made:
## library(ggplot)
## #create my dataset
## a<-c("4000","7500","4000","102000","14500","150000")
## b<-c("A","A","A","B","B","B")
## data<-data.frame(cbind(a,b))
## data$b<-as.factor(data$b)
## data$a<-as.integer(data$a)
## #plot the data, with scientific annotation
## scientific_10 <- function(x) { parse(text=gsub("e\\+*", " %*% 10^", scales::scientific_format()(x))) }
## A<-ggplot(data, aes(x = b, y = a, fill = b)) +
## geom_boxplot() +
## labs(y= "ppm", x = "contaminant")+
## geom_point(shape = 21, position = position_jitterdodge(jitter.width = 0))
## A<-A+ scale_y_continuous(label=scientific_10)
## A
##
## The problem is that I don't know how to write 0 at the first value. The other problem is that is not written the "x" symbol in the mathematical expression. Any idea?
## I attach a picture that I have obtained.
##
## Thank you very much! |0 |0 |4 |Miquel |2023-06-02 14:31:37Z |
## | 76390374|12 |0 |r,web-crawler,clientid |2023-06-02 12:59:25Z |Subeen Choi | 22005477|1 |i try to crawl some data from news using naver client id and password. however, it turns out that my client id is not valiable. can anybody solve this problem? thanks!
## library(httr)
## news <- GET(url = 'https://openapi.naver.com/v1/search/news.json?',
## add_headers('X-Naver-Client-Id' = 'XXXXXXXXXXXXXXXXXXX',
## 'X-Naver-Client-Secret' = 'XXXXXXXXXXXXXXXXXXX'),
## query = list(query = 'korean characters',
## display = 100,
## start = 1,
## sort = 'date'))
## content(news)
##
## $errorMessage
## [1] "NID AUTH Result Invalid (1000) : Authentication failed. (ì¸ì¦ì ì¤í¨íìµëë¤.)"
##
## $errorCode
## [1] "024"
##
## i tried to use another url and client id but it still didn't work,, |0 |0 |0 |Phil |2023-06-02 14:20:14Z |
## | 76390329|42 |3 |r,ggplot2,density-plot |2023-06-02 12:52:36Z |M. Beausoleil | 4224718|3,077 |I have data plotted as points and would like to add density plots to the graph. The marginal plot solutions from ggExtra or other packages are not giving the freedom that I'd like and so want to generate the density plot at the same time as the ggplot.
## df = data.frame(x = rnorm(50, mean = 10),
## y = runif(50, min = 10, max = 20),
## id = rep(LETTERS[1:5], each = 10))
## ggppp = ggplot(data = df, mapping = aes(x, y, color = id)) +
## geom_point() + theme_bw()
##
## ggppp +
## geom_density(mapping = aes(y = y,
## col = id),
## inherit.aes = FALSE, bounds = c(-Inf, Inf)) +
## geom_density(mapping = aes(x = x,
## col = id),
## inherit.aes = FALSE, )
##
##
## Is there a way to move the density plots to other values of x or y position (like moving the density lines to the tip of the arrow in the image below)? |6 |28 |61 |zx8754 |2023-06-02 20:47:45Z, 2023-06-02 14:16:30Z |
## | 76390299|23 |0 |r,tapply |2023-06-02 12:48:55Z |Oli Ck | 8675311|70 |I would like to use the names of the INDEX factor in my FUN function in tapply.
## My data and function are more complex but here is a simple reproducible example :
## data <- data.frame(x <- c(4,5,6,2,3,5,8,1),
## name = c("A","B","A","B","A","A","B","B"))
## myfun <- function(x){paste("The mean of NAME is ", mean(x))}
## tapply(data$x, data$name, myfun)
##
## Result :
## A B
## "The mean of NAME is 4.5" "The mean of NAME is 4"
##
## Where I would like NAME to be A or B. |0 |0 |5 |NA |NA |
## | 76390196|17 |0 |r |2023-06-02 12:34:44Z |EDOARDO MUSCI | 22007526|1 |I have a dataset 1200*10, I wanto to apply a sum using this sliding window function:
## slide_apply <- function (data, window, step , fun)
## slide_apply <- function (data, window, step , fun)
## {
## fun <- match.fun(fun)
## total <- length(data)
## window <- abs(window)
## spots <- seq(from = 1, to = (total - window + 1), by = abs(step))
## result <- rep(NA, length(spots))
## for (i in 1:length(spots)) {
## result[window + i - 1] <- fun(data[spots[i]:(spots[i] +
## window - 1)])
## }
## return(result)
## }
##
## but as a result I get:
## Error in seq.default(from = 1, to = (total - window + 1), by = abs(step)) :
## segno sgbagliato nell'argomento 'by'
## 4.
## stop("wrong sign in 'by' argument")
## 3.
## seq.default(from = 1, to = (total - window + 1), by = abs(step))
## 2.
## seq(from = 1, to = (total - window + 1), by = abs(step))
## 1.
## slide_apply(data = ret, window = 1200, step = 1, fun = sum)
##
## I can't get the problem...
## I tried to translate the input dataframe, and it works but I obtain a 12000 rows vector..so I guess something is wrong |0 |0 |0 |EDOARDO MUSCI |2023-06-02 12:36:34Z |
## | 76389748|12 |0 |r,statistics,r-lavaan,psych |2023-06-02 11:29:06Z |Isaiah Hipel | 22007010|1 |I am a psychology graduate student and I am trying to get the required statistics for my bifactor model according to APA. I was able to get the model to converge with some modifications to item loadings (there are 22 items total) using the step by Arndt Regorz. I have about 300 observations but the data may have some missing values. Any input on this would be greatly appreciated. I have copied and pasted the code I have below:
## #Create a model for bifactor analysis
## model_bif <- '
##
## #Define the general factor
## g =~ DTW_1 + DTW_2 + DTW_3 + DTW_6 + DTW_7 + DTW_8 + DTW_9 + DTW_10 + DTW_11 + DTW_12 + DTW_13 + DTW_14 + DTW_15 + DTW_16 + DTW_17 + DTW_18 + DTW_19 + DTW_20 + DTW_21 + DTW_22
##
## #Define the specific factors
## n =~ DTW_1 + DTW_2 + DTW_3 + DTW_4 + DTW_5 + DTW_6
## m =~ DTW_7 + DTW_8 + DTW_9 + DTW_10
## s =~ DTW_17 + DTW_19 + DTW_21
##
## '
##
## #Fit the model
## fit_bif <- cfa(model_bif, data = dataset, orthogonal = TRUE)
## summary(fit_bif, fit.measures = TRUE, standardized = TRUE)
##
## #build a path diagram
## semPaths(fit_bif, residuals=F,sizeMan=7,"std",
## posCol=c("skyblue4", "red"),
## #edge.color="skyblue4",
## edge.label.cex=1.2,layout="circle2")
##
## I have tried using the psych package but can't get the omega function to work. It continues to say "Error in m$n.obs : $ operator is invalid for atomic vectors". |0 |0 |0 |NA |NA |
## | 76389710|19 |0 |r,if-statement,conditional-statements,raster,spatial |2023-06-02 11:22:12Z |user18791303 | 18791303|3 |I have to assign benchmarks for soil organic carbon values based on three informations:
## Land use type: cropland or grassland? (this information is provided in a shape file, where the fields are given as polygons). Not the entire study area is cropland or grassland, and I only want to apply the calculations to the polygons where this condition is met.
## Soil texture (this info is both available as raster (100mx100m) grid or as shape file. I thought it is easier for the calculations if I use it in raster format)
## Carbon value (raster file in 100x100m resolution)
## I am not experienced with spatial data calculations, and with data tables I would just create a nested for loop with all my conditions. However, I don't know how to do this creating a new raster file and 'accessing' the information of both the shape file (land use type) AND soil texture and carbon value rasters.
## All files have the same coordinate system and are overlapping.
## To make it short, for a similar task I have used somewhat this type of loop with conditional statements:
## for(i in nrow(new_raster)){
## if(landuse_shp[i] == 'AL'){
## if(texture_tif [i] == 'light'){
## if(carbon_tif[i] >= 0.68 && carbon_tif[i] <= 1.23){
## new_raster[i] <- "1"
## }
## else if(carbon_tif[i] > 1.23){
## new_raster[i] <- "2"
## }
## #and so on...
## }
## if(texture_tif[i] == 'middle I'){
## if(carbon_tif[i] < 2.4) {
## new_raster[i] <- "0"
## }
## else if(...)
##
##
## This is rather a conceptual question, because I do not have any code yet, as I don't know which first steps would make most sense...
## Thanks a lot for you help &cheers |0 |0 |3 |NA |2023-06-02 13:33:45Z |
## | 76389697|41 |1 |r,image,ocr,data-cleaning |2023-06-02 11:20:41Z |zachi | 7862953|473 |I am trying to scan a text from an Ocr and clean it, I got a character that is divided to few lines, however I would like to have the text in similar to the way it is in the image
## the code :
## heraclitus<-"greek.png"
## library(tidyverse)
## library(tesseract)
## library(magick)
##
## image_greek<-image_read(heraclitus)
##
## image_greek<-image_greek %>% image_scale("600") %>%
## image_crop("600x400+220+150") %>%
## image_convert(type = 'Grayscale') %>%
## image_contrast(sharpen = 1) %>%
## image_write(format="jpg")
##
## heraclitus_sentences<-magick::image_read(image_greek)%>%
## ocr() %>% str_split("\n")
##
## As you can see from the output, I have white spaces and sentences that are divided to two lines. I would like to have it in a vector or a list, that each element will be a sentence |0 |4 |13 |jpsmith |2023-06-02 11:31:21Z |
## | 76389613|20 |0 |r,loops,matrix,indexing,append |2023-06-02 11:07:28Z |s28 | 21573410|29 |I loop through a matrix and I would like to append a matrix row to another empty matrix if a specific condition is met. How do I do this without getting problems with the different indexes?
## I had this code, but my dataset is very large, so I get problems in the implementation
## for (i in 1:length(matrix1)) {
## if ((substr(matrix1[i,1],6,7) == '02') == TRUE) {
## for (j in 1:nrow(matrix2)) {
## matrix2[j,] <- matrix1[i,]
## }
## }
## }
##
## Is there a more efficient solution?
## dput(matrix1[1]) is c("271", "269", "274", "278", "293", "270", "274", "274", "275", "271", "2018-01-03_0445")
## nrow(matrix1) is 400000 |0 |0 |5 |s28 |2023-06-02 11:29:58Z |
## | 76389515|25 |0 |r,if-statement,alphanumeric |2023-06-02 10:53:26Z |Naomi | 7655794|297 |This is my df:
## df <- data.frame(id=as.integer(c(1:6)),
## code=as.character(c("C410", "D486", "D485", "D501", "D600", "D899")))
## df
## id code
## 1 1 C410
## 2 2 D486
## 3 3 D485
## 4 4 D501
## 5 5 D600
## 6 6 D899
##
## I want to attribute causes to each id depending on the range they fall into in column 2. For this, I use a ifelse statement:
## df$cause <- ifelse(df$code >= "C00" & df$code <= "D48", "cause 1",
## ifelse(df$code >= "D50" & df$code <= "D89", "cause 2", NA))
##
## Issue: the algorithm does not capture values above the end of each range (until the maximum possible value)
## df
## id code cause
## 1 1 C410 cause 1
## 2 2 D486 <NA>
## 3 3 D485 <NA>
## 4 4 D501 cause 2
## 5 5 D600 cause 2
## 6 6 D899 <NA>
##
## Desired output:
## df
## id code cause
## 1 1 C410 cause 1
## 2 2 D486 cause 1
## 3 3 D485 cause 1
## 4 4 D501 cause 2
## 5 5 D600 cause 2
## 6 6 D899 cause 2 |0 |2 |11 |NA |NA |
## | 76389510|25 |0 |r,dataframe |2023-06-02 10:52:53Z |Usman YousafZai | 1535580|1,018 |This question already has answers here:
##
##
##
##
##
##
## How can I assign a value using if-else conditions in R
##
## (2 answers)
##
##
## Closed yesterday.
##
##
##
## I want to add one a column called "Opt-Numbers" to my data frame with the following values Opt-CMM and Opt-MM based on Numbers column. If the value in Numbers column are greater or equal to 4 then it should add Opt-CMM in the same row of that value or if it is less than 4 then add Opt-MM in the same row. I am also showing an example in below df.
## Given DF.
##
## S.NO
## Numbers
## P1
## 2
## P2
## 5
## P3
## 2
## P4
## 2
## P5
## 3
## P6
## 4
##
## Required DF
##
## S.NO
## Numbers
## Opt-Numbers
## P1
## 2
## Opt-MM
## P2
## 5
## Opt-CMM
## P3
## 2
## Opt-MM
## P4
## 2
## Opt-MM
## P5
## 3
## Opt-MM
## P6
## 4
## Opt-CMM |4 |17 |43 |stefan |2023-06-02 11:00:31Z |
## | 76389468|23 |0 |r,dplyr |2023-06-02 10:47:23Z |psysky | 4529548|3,017 |i have such data example
## mydata=structure(list(month_id = c(201206L, 201206L, 201207L, 201207L,
## 201306L, 201306L, 201307L, 201307L, 201406L, 201406L, 201407L,
## 201407L, 201506L, 201506L, 201507L, 201507L, 201606L, 201606L,
## 201607L, 201607L, 201706L, 201706L, 201707L, 201707L), MDM_Key = c(1L,
## 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
## 2L, 1L, 2L, 1L, 2L, 1L, 2L), sale_count = c(6978517L, 13957034L,
## 6148636L, 12297272L, 6147466L, 12294932L, 6050044L, 12100088L,
## 8127548L, 16255096L, 3341936L, 6683872L, 8995701L, 17991402L,
## 6803563L, 13607126L, 7098546L, 14197092L, 7855146L, 15710292L,
## 5575453L, 11150906L, 7543770L, 15087540L)), class = "data.frame", row.names = c(NA,
## -24L))
##
## I need for each group (mdm_key) to get the difference between the months (07-06) by sale_count Variable. The data contains histories by year. e.g. 06(june) and 07(jule) monthes for 2012,
## 06 and 07 for 2013 and so on...
## For each year and each mdm_key I need to subtract the sale_count from month 7 to month 6.
## So that the desired result for each year and each group mdm_key looks something like this
## year MDM_Key sale_count
## 1 2012 1 -829881
## 2 2013 1 -97422
## 3 2014 1 -4785612
## 4 2015 1 -2192138
## 5 2016 1 756600
## 6 2017 1 1968317
## 7 2012 2 -1659762
## 8 2013 2 -194844
## 9 2014 2 -9571224
## 10 2015 2 -4384276
## 11 2016 2 1513200
## 12 2017 2 3936634
##
## What is the easiest way to do such a subtraction?
## Thanks for your any help. |5 |28 |62 |NA |NA |
## | 76389386|20 |1 |r,dplyr,distinct,summarize |2023-06-02 10:36:18Z |Sanna | 22006219|11 |very grateful for your help trying to group/collapse rows of all columns (all but the two columns I use to group_by) and would like to exclude duplicated strings in the merge (only keep distinct strings and numbers). The df has many many more columns but here is a toy example.
## Is there a way to only keep distinct strings when using summarize_all?
## df = data.frame(
## + variant = c("1-12345-GT", "1-12345-GT", "2-3456-C-T", "3-45567-C-A", "4-566879-C-T", "4-566879-C-T"),
## ID=c("A", "A", "B", "C", "D", "D"),
## value1=c( "GJB1", "GJB1", "TBC", "TZY", "FBY", "FBY"),
## value2=c( 0.5, 1, 0.5, 0.5, 0.5, 0.5))
##
## df
## variant ID value1 value2
## 1 1-12345-GT A GJB1 0.5
## 2 1-12345-GT A GJB1 1.0
## 3 2-3456-C-T B TBC 0.5
## 4 3-45567-C-A C TZY 0.5
## 5 4-566879-C-T D FBY 0.5
## 6 4-566879-C-T D FBY 0.5
##
##
## I tried
## df %>% group_by(variant, ID) %>% summarise_all(~(toString(na.omit(.))))
##
##
## A tibble: 4 Ã 4
## Groups: variant [4]
## variant ID value1 value2
## <chr> <chr> <chr> <chr>
## 1 1-12345-GT A GJB1, GJB1 0.5, 1
## 2 2-3456-C-T B TBC 0.5
## 3 3-45567-C-A C TZY 0.5
## 4 4-566879-C-T D FBY, FBY 0.5, 0.5
##
##
## I would like row 1 GJB1 and row 4 FBY and 0.5 to not be duplicated. But I would like to keep both unique values at row 1: 0.5, 1.
## I tried splitting the rows. But this does not eliminate GJB1 duplicates since the rows will still be unique.
## df %>% separate_rows(value1, value2) %>% unique() %>% group_by(variant, ID) %>% summarise_all(~(toString(na.omit(.)))) %>% ungroup()
##
##
## A tibble: 4 Ã 4
## variant ID value1 value2
## <chr> <chr> <chr> <chr>
## 1 1-12345-GT A GJB1, GJB1 0.5, 1
## 2 2-3456-C-T B TBC 0.5
## 3 3-45567-C-A C TZY 0.5
## 4 4-566879-C-T D FBY 0.5
##
## Thinking there must be a function only to keep distinct strings when using summarize_all. Thank you! |0 |0 |2 |Sanna |2023-06-02 12:27:45Z |
## | 76389243|45 |0 |r,bash,commandargument |2023-06-02 10:12:52Z |guillaume | 22006375|1 |i'm currently running a r program where i use already 9 argument with "commandArgs()" for my bash script, but it seems i can't use a 10th argument, the program doesn't take it, is there a way i could do it and won't have to make an other program just for that ? thanks in advance for your help ! |0 |0 |0 |NA |NA |
## | 76389056|25 |-1 |r |2023-06-02 09:48:10Z |user15079730 | 15079730|1 |Closed. This question needs details or clarity. It is not currently accepting answers.
##
##
##
##
##
##
##
##
##
##
##
## Want to improve this question? Add details and clarify the problem by editing this post.
##
##
## Closed yesterday.
##
##
##
##
##
##
##
##
## Improve this question
##
##
##
##
## How to achieve the same as the following code (python) in R, can not find the right functions
##
##
## for achieving k-anonymity
## thanks in advance
## tried to groupby, but could not achieve the reset index and bins |0 |0 |0 |Phil |2023-06-02 14:19:46Z |
## | 76389050|50 |0 |r |2023-06-02 09:47:29Z |Albert | 12378480|119 |Consider the following dataset
## mydata<-data.frame(id = c("R007", "R008", "R008", "R009", "R009"),
## statenumber= c(1, 2, 3, 4, 5),
## startdate = c(20080101, 20080101, 20120301,20120101, 20121001),
## enddate = c(20121201, 20120301, 20121201, 20121001, 20121201))
##
## #if necessary we can write the dates as dates
## mydata$startdate<-as.Date(as.character(mydata$startdate), "%Y%m%d")
## mydata$enddate<-as.Date(as.character(mydata$enddate), "%Y%m%d")
##
## The explanation of the dataset is as follows. A person with id number R007 lives during 2008 and 2012 at the same address. Person R008 lives during 2008 and 2012 at two addresses (state number). So for the years 2008-2011, he lives at address 2, and for the year 2012 he lives at address 3. Person R009 is only available in the file since 2012. During the first year of 2012 he lives at address 4 and in the last two months he lives at address 5.
## Now I want to rearrange this dataset such that I have one address line per person per year. I would like to do this by making use of a reference month (say June). In other words, if a person lives in June at a particular address, I assign that address to him for the entire year. The dataset would then look as follows
## endresult<-data.frame(id = c("R007", "R007","R007","R007","R007",
## "R008", "R008", "R008", "R008","R008", "R009"),
## statenumber = c(1,1,1,1,1,2,2,2,2,3,4),
## year = c(2008, 2009, 2010, 2011, 2012,
## 2008,2009,2010,2011,2012, 2012))
##
## Does anyone know how to get to this endresult dataset? I appreciate any help. |0 |0 |7 |Albert |2023-06-02 11:52:36Z, 2023-06-02 12:32:05Z |
## | 76389028|38 |0 |r,ggplot2,tidyverse,x-axis,geom |2023-06-02 09:43:17Z |Lenman55 | 18991506|27 |G'day,
## I have a bunch of data which can be grouped. I want to display those data in a plot.
## Nothing special so far. But i have stumbled over the problem of displaying the data in a way, that signals their grouping. What i want is header for each section at the x-axis: so for Example the string "header 1" at the beginnen of the axis, "header 2" between A and B and so on.
## Here's the code i got so far
## # packages
## library (ggplot2)
##
## # data
## df = data.frame(
## x = factor(c("A", "B", "C", "B", "A", "C")),
## y = c(10, 15, 8, 12, 9, 10)
## )
##
## # Base plot
## p <- ggplot(df, aes(x,y)) + geom_point() +
## coord_flip()
##
## p
##
##
## There already exists a thread on a similar topic. Yet i would prefer a more "ggplot-y" way of doing so. Plus i would want the headers to be located directly at the x- axis.
## Any help would be awesome, thanks |0 |0 |4 |Lenman55 |2023-06-02 09:48:26Z |
## | 76388858|24 |0 |r,raster,terra |2023-06-02 09:25:44Z |89_Simple | 3227302|3,373 |library(terra)
## library(RColorBrewer)
##
## # sample polygon
## p <- system.file("ex/lux.shp", package="terra")
## p <- terra::vect(p)
##
## # sample raster
## r <- system.file("ex/elev.tif", package="terra")
## r <- terra::rast(r)
## r <- terra::crop(r, p , snap = "out", mask = T)
##
## terra::plot(r,
## col = brewer.pal(9, "pastel1"),
## cex.main = 2,
## smooth = T,
## legend = T,
## plg = list(title = "Score"),
## axes = TRUE,
## mar=c(3,3,3,6))
## plot(p, add = T)
##
## How do I change the size and orientation of the legend title 'Score'.
## I want to orient the title so that it is vertical and follows along the
## legend and also change the size of the legend title? |3 |36 |88 |bretauv |2023-06-02 09:55:33Z |
## | 76388339|39 |0 |r,dataûtable |2023-06-02 08:12:26Z |zhang | 20240835|125 |I have a data.table
## > dput(data.summary)
## structure(list(summary_type = c("0", "1", "2"), count = structure(list(
## 62234441L, 5119L, 821770L), .internal.selfref = <pointer: 0x557538f028c0>)), row.names = c(NA,
## -3L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x557538f028c0>)
##
## data.summary
## summary_type count
## 1: 0 62234441
## 2: 1 5119
## 3: 2 821770
##
## Its a data.table with 3 row, I want to sort it by count
## data.summary[order(count)]
##
## But after this, there are only have 1 row in data.table
## data.summary[order(count)]
## summary_type count
## 1: 0 62234441 |0 |0 |6 |Maël |2023-06-02 08:19:10Z |
## | 76388303|12 |1 |r,shiny |2023-06-02 08:08:34Z |PK X-FAB | 21765186|13 |I am displaying a selectInput in my shiny app and it is getting its choices from a dataframe (which is coming from a table in the database).
## this table (dataframe) has primary key and title in it. I want to show my users the title, but when they choose an option, I want to get the Id of the selected option to use in my code.
## I managed to get the selected "value" but I couldn't find how to get the "id" from the selected option.
## below is a simple example of how my code is looking with test objects:
##
##
## library(DT)
## library(tidyverse)
## library(shiny)
##
##
##
##
## test_id <- c(1, 2, 3)
## test_title <- c("a", "b", "c")
##
## test_df <- data.frame(test_id, test_title)
## test_df <- column_to_rownames(test_df, var = "test_id")
##
##
## ui <- fluidPage(
## selectInput("test", "test", choices = test_df),
##
## textOutput("selectedItem")
##
## )
##
## server <- function(input, output, session) {
##
## observeEvent(input$test, {
## output$selectedItem <- renderText(input$test)
## })
##
##
##
## }
##
## shinyApp(ui, server)
##
##
## does anyone have a solution to get the "id" of the selected option?
## I have tried this
## output$selectedItem <- renderText(input$test)
##
## but it is returning the value and not the Id |0 |0 |2 |NA |NA |
## | 76388156|30 |0 |r,ggplot2,survival-analysis |2023-06-02 07:48:22Z |MaxStudent | 12371008|73 |I have big data (n=500,000) on which I want to make survival analysis.
## I believe I have been able to get the regression as below.
## # your custom theme; unchanged
## custom_theme <- function(){
## theme_survminer() %+replace%
## theme(
## legend.background = element_rect(fill = "white", color = "black"),
## plot.title=element_text(hjust=0.4)
## )
## }
##
## # Create the formula
## filtered_data$event <- filtered_data$match_MACE
## filtered_data$event_followup <- filtered_data$time_diff_MACE_months
## formula1 <- as.formula("Surv(event_followup, event) ~ score")
##
## # Fit Cox proportional hazards model with reference group
## cox1 <- coxph(Surv(event_followup, event) ~ relevel(score, ref = "no AF proc + inactive"), data = filtered_data)
##
## cox1 <- coxph(formula1, data = filtered_data)
##
## coxP1 <- data.frame(summary(cox1)$coefficients)[,5]
##
## # Generate the confidence intervals and p-values with renamed scores
## coxConf1 <- data.frame(summary(cox1)$conf.int) %>%
## rownames_to_column() %>%
## mutate(p = coxP1,
## p2 = case_when(
## round(p, 3) > p ~ '=',
## round(p, 3) < p ~ '=',
## round(p, 3) == p ~ '='
## ),
## p3 = ifelse(round(p, 2) == 1, T, F),
## tag = paste0(rowname %>% gsub("(\\D)(\\d)", "\\1 \\2", .),
## ". HR ", exp.coef. %>% sprintf(fmt = "%.2f", .),
## " (95% CI ",
## lower..95 %>% sprintf(fmt = "%.2f", .),
## "-", upper..95 %>% sprintf(fmt = "%.2f", .),
## "), ",
## ifelse(p < 0.001, "P<0.001",
## ifelse(p >= 0.01 & p < 0.05, "P<0.05",
## ifelse(p >= 0.05, paste0("P", p2, sprintf(fmt = "%.2f", round(p, 2))),
## paste0("P", p2, sprintf(fmt = "%.3f", p))))))) %>%
## select(tag) %>%
## rename(new_variable_name = tag)
##
## coxConf1$new_variable_name <- gsub("score", "", coxConf1$new_variable_name)
##
## # Remove leading and trailing spaces
## coxConf1$new_variable_name <- trimws(coxConf1$new_variable_name)
##
## # Validate the result
## coxConf1
##
## But I am struggling to create the survival analysis plot where I will have
## the four groups, with no AF + inactive as the reference group
## and the reference group at the top of the legends, followed by no AF proc + active, then AF proc + inactive + AF proc + active.
## same order for the risk table.
## At the moment, the output in the legends are not the same than the output from the cox regression, and the colour in the table are not the same than the colour in the plot.
## # model to plot-
## ###----
## fit1 <- survfit(Surv(event_followup, event) ~ relevel(score, ref = "no AF proc + inactive"), data = filtered_data)
##
## # Reorder the legend labels based on the desired order
## legend_labels <- c("No AF proc + inactive", "No AF proc + active", "AF proc + inactive", "AF proc + active")
##
## risk_table_order <- c("no AF proc + inactive", "No AF proc + active", "AF proc + inactive", "AF proc + active")
##
## # Generate the plot with the reordered legends
## uni <- ggsurvplot(fit1,
## test.for.trend = TRUE,
## conf.int = FALSE,
## censor = FALSE,
## ggtheme = custom_theme(),
## legend = c(0.4, 0.25),
## font.legend = c(size = 12),
## legend.title = element_blank(),
## legend.labs = c("no AF proc + inactive ", unlist(coxConf1)),
## xlab = "Follow-up (months)",
## font.x = c(size = 12),
## risk.table = T,
## risk.table.side = "left",
## risk.table.y.text = F,
## risk.table.title = "Risk Table",
## risk.table.col = 1,
## risk.table.order = risk_table_order,
## ylab = "Free from MACE",
## font.y = c(size = 12),
## ylim = c(0, 1.05),
## xlim = c(0, N * 12),
## break.y.by = 0.2,
## break.x.by = 6,
## axes.offset = TRUE)
##
## # Add the additional labels from coxConf1 to the legend
## uni$plot <- uni$plot +
## scale_color_manual(labels = c(coxConf1$new_variable_name, "no AF proc + inactive"),
## values = c("green", "blue", "orange", "red"))
##
## # Update the color scale in the risk table
## uni$table <- uni$table +
## scale_fill_manual(labels = c(coxConf1$new_variable_name, "no AF proc + inactive"),
## values = c("green", "blue", "orange", "red"))
##
## # Print the plot
## print(uni)
##
## Here are the portion of my data:
## Participant.ID match_MACE time_diff_MACE_months score
## 1 1169548 0 72.0 AF proc + active
## 2 1098887 0 72.0 AF proc + active
## 3 1064538 0 72.0 AF proc + active
## 4 1089599 1 52.8 AF proc + active
## 5 1151144 0 72.0 AF proc + active
## 6 1195483 1 3.9 AF proc + active
## 7 1188227 0 72.0 AF proc + inactive
## 8 1009863 0 72.0 AF proc + inactive
## 9 1081307 0 72.0 AF proc + inactive
## 10 1154860 1 24.1 AF proc + inactive
## 11 1032585 0 72.0 AF proc + inactive
## 12 1150639 1 41.6 AF proc + inactive
## 13 1162966 1 10.0 AF proc + inactive
## 14 1036817 1 38.9 AF proc + inactive
## 15 1110369 1 37.1 AF proc + inactive
## 16 1224730 0 72.0 AF proc + inactive
## 17 1081741 1 1.6 no AF proc + active
## 18 1151938 0 72.0 no AF proc + active
## 19 1094579 0 72.0 no AF proc + active
## 20 1099578 0 72.0 no AF proc + active
## 21 1103712 1 18.7 no AF proc + active
## 22 1165594 1 11.7 no AF proc + active
## 23 1112693 0 72.0 no AF proc + active
## 24 1212915 0 72.0 no AF proc + active
## 25 1098520 0 72.0 no AF proc + active
## 26 1199539 0 72.0 no AF proc + active
## 27 1032596 0 72.0 no AF proc + active
## 28 1081820 0 72.0 no AF proc + active
## 29 1182095 0 72.0 no AF proc + active
## 30 1080136 0 72.0 no AF proc + active
## 31 1168550 1 18.0 no AF proc + active
## 32 1112211 1 5.9 no AF proc + active
## 33 1099400 0 72.0 no AF proc + active
## 34 1150138 0 72.0 no AF proc + active
## 35 1169589 0 72.0 no AF proc + active
## 36 1130930 0 72.0 no AF proc + active
## 37 1023969 0 72.0 no AF proc + active
## 38 1148484 0 72.0 no AF proc + active
## 39 1122955 0 72.0 no AF proc + active
## 40 1156431 0 72.0 no AF proc + active
## 41 1019486 1 11.7 no AF proc + active
## 42 1071958 0 72.0 no AF proc + active
## 43 1108366 0 72.0 no AF proc + active
## 44 1204170 0 72.0 no AF proc + active
## 45 1004705 1 28.4 no AF proc + active
## 46 1118320 0 72.0 no AF proc + active
## 47 1029092 0 72.0 no AF proc + active
## 48 1124181 0 72.0 no AF proc + active
## 49 1215341 0 72.0 no AF proc + active
## 50 1198926 0 72.0 no AF proc + active
## 51 1011130 0 72.0 no AF proc + active
## 52 1025778 0 72.0 no AF proc + active
## 53 1158451 1 1.2 no AF proc + active
## 54 1138223 0 72.0 no AF proc + active
## 55 1060529 1 49.9 no AF proc + active
## 56 1152697 1 66.6 no AF proc + active
## 57 1053224 0 72.0 no AF proc + active
## 58 1049995 0 72.0 no AF proc + active
## 59 1010349 0 72.0 no AF proc + active
## 60 1038676 0 72.0 no AF proc + active
## 61 1113027 0 72.0 no AF proc + active
## 62 1173430 0 72.0 no AF proc + active
## 63 1197104 0 72.0 no AF proc + active
## 64 1084089 0 72.0 no AF proc + active
## 65 1056039 0 72.0 no AF proc + active
## 66 1016849 1 11.4 no AF proc + active
## 67 1085906 0 72.0 no AF proc + active
## 68 1069267 0 72.0 no AF proc + active
## 69 1088756 0 72.0 no AF proc + active
## 70 1169474 0 72.0 no AF proc + active
## 71 1212484 0 72.0 no AF proc + active
## 72 1160969 1 2.1 no AF proc + active
## 73 1110720 0 72.0 no AF proc + active
## 74 1159911 0 72.0 no AF proc + active
## 75 1193184 1 2.6 no AF proc + active
## 76 1214083 1 34.5 no AF proc + active
## 77 1039295 1 49.8 no AF proc + active
## 78 1220844 0 72.0 no AF proc + active
## 79 1103750 1 5.9 no AF proc + active
## 80 1155089 0 72.0 no AF proc + active
## 81 1030976 0 72.0 no AF proc + active
## 82 1015095 1 68.9 no AF proc + active
## 83 1209639 0 72.0 no AF proc + active
## 84 1221353 1 3.0 no AF proc + active
## 85 1115244 1 20.4 no AF proc + active
## 86 1102645 1 9.3 no AF proc + active
## 87 1141749 0 72.0 no AF proc + active
## 88 1167528 0 72.0 no AF proc + active
## 89 1136411 1 24.0 no AF proc + active
## 90 1076923 1 3.6 no AF proc + active
## 91 1220060 1 9.3 no AF proc + active
## 92 1217955 1 13.5 no AF proc + active
## 93 1024327 1 16.8 no AF proc + active
## 94 1161868 1 30.6 no AF proc + active
## 95 1164625 1 55.5 no AF proc + active
## 96 1044532 0 72.0 no AF proc + active
## 97 1051373 0 72.0 no AF proc + active
## 98 1010462 0 72.0 no AF proc + active
## 99 1064218 1 16.4 no AF proc + active
## 100 1164728 1 20.9 no AF proc + active
## 101 1193872 0 72.0 no AF proc + active
## 102 1065997 1 27.0 no AF proc + active
## 103 1018197 0 72.0 no AF proc + active
## 104 1078482 0 72.0 no AF proc + active
## 105 1140958 0 72.0 no AF proc + active
## 106 1212470 0 72.0 no AF proc + active
## 107 1045422 0 72.0 no AF proc + active
## 108 1159452 0 72.0 no AF proc + active
## 109 1063314 0 72.0 no AF proc + active
## 110 1081265 1 40.8 no AF proc + active
## 111 1177440 0 72.0 no AF proc + active
## 112 1156121 1 8.0 no AF proc + active
## 113 1148416 0 72.0 no AF proc + active
## 114 1170157 0 72.0 no AF proc + inactive
## 115 1169467 0 72.0 no AF proc + inactive
## 116 1039020 0 72.0 no AF proc + inactive
## 117 1116305 1 68.5 no AF proc + inactive
## 118 1036116 0 72.0 no AF proc + inactive
## 119 1000414 0 72.0 no AF proc + inactive
## 120 1101961 0 72.0 no AF proc + inactive
## 121 1128884 0 72.0 no AF proc + inactive
## 122 1079375 0 72.0 no AF proc + inactive
## 123 1083593 1 3.4 no AF proc + inactive
## 124 1017198 0 72.0 no AF proc + inactive
## 125 1184712 0 72.0 no AF proc + inactive
## 126 1006604 1 59.4 no AF proc + inactive
## 127 1202283 1 22.6 no AF proc + inactive
## 128 1197594 0 72.0 no AF proc + inactive
## 129 1145799 0 72.0 no AF proc + inactive
## 130 1072222 0 72.0 no AF proc + inactive
## 131 1168984 1 14.0 no AF proc + inactive
## 132 1134637 1 10.1 no AF proc + inactive
## 133 1119639 1 25.4 no AF proc + inactive
## 134 1082634 1 69.0 no AF proc + inactive
## 135 1174344 0 72.0 no AF proc + inactive
## 136 1020621 1 7.8 no AF proc + inactive
## 137 1024878 0 72.0 no AF proc + inactive
## 138 1068864 0 72.0 no AF proc + inactive
## 139 1115700 1 45.6 no AF proc + inactive
## 140 1075462 0 72.0 no AF proc + inactive
## 141 1173856 0 72.0 no AF proc + inactive
## 142 1162017 0 72.0 no AF proc + inactive
## 143 1131283 0 72.0 no AF proc + inactive
## 144 1204985 0 72.0 no AF proc + inactive
## 145 1024645 0 72.0 no AF proc + inactive
## 146 1221414 1 1.3 no AF proc + inactive
## 147 1207542 1 30.4 no AF proc + inactive
## 148 1002733 0 72.0 no AF proc + inactive
## 149 1158465 0 72.0 no AF proc + inactive
## 150 1214236 0 72.0 no AF proc + inactive
## 151 1099718 1 6.8 no AF proc + inactive
## 152 1032299 1 62.3 no AF proc + inactive
## 153 1062237 1 25.1 no AF proc + inactive
## 154 1180813 1 7.7 no AF proc + inactive
## 155 1112010 0 72.0 no AF proc + inactive
## 156 1098256 0 72.0 no AF proc + inactive
## 157 1223290 0 72.0 no AF proc + inactive
## 158 1140145 0 72.0 no AF proc + inactive
## 159 1082226 0 72.0 no AF proc + inactive
## 160 1092734 1 39.6 no AF proc + inactive
## 161 1057267 1 18.8 no AF proc + inactive
## 162 1199418 0 72.0 no AF proc + inactive
## 163 1143094 0 72.0 no AF proc + inactive
## 164 1192641 1 56.8 no AF proc + inactive
## 165 1215091 1 8.0 no AF proc + inactive
## 166 1201176 1 37.9 no AF proc + inactive
## 167 1217497 0 72.0 no AF proc + inactive
## 168 1026150 1 26.1 no AF proc + inactive
## 169 1092164 0 72.0 no AF proc + inactive
## 170 1222354 0 72.0 no AF proc + inactive
## 171 1200144 0 72.0 no AF proc + inactive
## 172 1154902 0 72.0 no AF proc + inactive
## 173 1163901 0 72.0 no AF proc + inactive
## 174 1085890 1 28.1 no AF proc + inactive
## 175 1184176 0 72.0 no AF proc + inactive
## 176 1068718 0 72.0 no AF proc + inactive
## 177 1221825 0 72.0 no AF proc + inactive
## 178 1124417 1 18.5 no AF proc + inactive
## 179 1080766 0 72.0 no AF proc + inactive
## 180 1197133 0 72.0 no AF proc + inactive
## 181 1152678 0 72.0 no AF proc + inactive
## 182 1049669 1 26.4 no AF proc + inactive
## 183 1046175 1 13.0 no AF proc + inactive
## 184 1094515 1 59.9 no AF proc + inactive
## 185 1066319 0 72.0 no AF proc + inactive
## 186 1167031 1 4.0 no AF proc + inactive
## 187 1099812 0 72.0 no AF proc + inactive
## 188 1118674 0 72.0 no AF proc + inactive
## 189 1216340 1 24.3 no AF proc + inactive
## 190 1060200 1 24.0 no AF proc + inactive
## 191 1055191 0 72.0 no AF proc + inactive
## 192 1093435 0 72.0 no AF proc + inactive
## 193 1164844 1 67.0 no AF proc + inactive
## 194 1220734 1 12.0 no AF proc + inactive
## 195 1171966 0 72.0 no AF proc + inactive
## 196 1024115 1 1.7 no AF proc + inactive
## 197 1143227 1 11.3 no AF proc + inactive
## 198 1021282 1 1.5 no AF proc + inactive
## 199 1101307 1 37.9 no AF proc + inactive
## 200 1191745 1 64.3 no AF proc + inactive |0 |0 |5 |zx8754 |2023-06-02 08:52:43Z |
## | 76388032|57 |-2 |r,dataframe |2023-06-02 07:28:23Z |fatemeh | 19819041|1 |I have a data frame that shows :
## "dataframe1" :
## rows : index
## columns:
## column 1: a gene name
## column 2: mutation
## column 3: a sample
## visually like this:
##
##
## gene name
## mutation
## sample
## 1
## gene1
## a
## sample1
## 2
## gene2
## b
## sample2
## 3
## gene4
## a
## sample3
## 4
## gene4
## c
## sample2
##
## What I would like to achieve is below structure:
## "dataframe2":
## rows: a gene name
## columns:
## sample 1: if a mutation occurred for sample1,then put the number1; Otherwise put the number zero.
## sample 2: if a mutation occurred for sample2,then put the number1; Otherwise put the number zero.
## sample 3: if a mutation occurred for sample3,then put the number1; Otherwise put the number zero. . . .
## visually like this:
##
##
## sample1
## sample2
## sample3
## gene1
## 1
## 0
## 0
## gene2
## 0
## 1
## 0
## gene4
## 0
## 1
## 1
##
##
##
##
##
## How can I convert "dataframe1" to "dataframe2" ? could you guide me please ?
## I'm quit new in R . could you guide me please? |0 |0 |0 |fatemeh |2023-06-02 19:08:25Z |
## | 76387964|25 |-2 |r,glm,h2o,gbm |2023-06-02 07:18:41Z |Den | 22005307|1 |Closed. This question needs debugging details. It is not currently accepting answers.
##
##
##
##
##
##
##
##
##
##
##
## Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
##
##
## Closed 7 hours ago.
##
##
##
##
##
##
##
##
## Improve this question
##
##
##
##
## my issue is about calculating the mean residual deviance in H2O. The function I've specified to calculate it is as follows:
## The function used to calculate mean residual deviance
## The value I received by using it is 0.2766575. However, after using h2o.performance(model_name) the value shown by it is substantially different:
## h2o.performance mean residual deviance
## As you can see, the mean deviance shown by h2o is almost twice as big as the deviance shown by my function. Does anyone by any chance know by what exact formula is the MRD shown in h2o.performance calculated? I tried to find the answer to what's causing the difference on github, but didn't manage.
## Thank you in advance.
## In general, I tried to find out the formula used by h2o.performance and what's taken into account in the shown results of that function. |0 |0 |0 |NA |NA |
## | 76387937|31 |-1 |r,dplyr,data-wrangling,grepl |2023-06-02 07:13:16Z |jo_ | 14385099|521 |I have a dataframe that looks something like this:
## example <- data.frame(
## date = c("6/1/22", "6/2/22", "6/3/22",
## "6/1/22", "6/2/22", "6/3/22",
## "6/1/22", "6/2/22", "6/3/22",
## "6/1/22", "6/2/22", "6/3/22"),
## sub = c(1101, 1101, 1101,
## 1102, 1102, 1102,
## 2101, 2101, 2101,
## 2102, 2102, 2102),
## express_p = c("eg1", "eg2", "eg3", "eg4",
## "eg5", "eg6", "eg7", "eg8",
## "eg9", "eg10", "eg11", "eg12"),
## p_express = c("a", "b", "c", "d",
## "e", "f", "g", "h",
## "i", "j", "k", "l")
## )
##
## I want to regroup the data by date, pair the sub and create new columns. This is how the end result should look like:
## example_clean <- data.frame(
## date = c("6/1/22", "6/2/22", "6/3/22", "6/1/22", "6/2/22", "6/3/22"),
## subA = c(1101, 1101, 1101, 1102, 1102, 1102),
## subB = c(2101, 2101, 2101, 2102, 2102, 2102),
## express_p_A = c("eg1", "eg2", "eg3", "eg7", "eg8", "eg9"),
## p_express_B = c("d", "e", "f", "j", "k", "l"),
## express_p_B = c("eg4", "eg5", "eg6", "eg10", "eg11", "eg12"),
## p_express_A = c("a", "b", "c", "g", "h", "i")
## )
##
## Essentially, I am pairing up all the numbers in sub that have the same 3 last digits to be in the saw row. Each pair is categorized by date. Then, the order of the columns should also be reshuffled (and renamed) such that express_p for one subject is right beside p_express of its corresponding partner (e.g. 1101's express_p is to the left of 2101's p_express).
## I've asked this question previously with a simpler case and received a great answer. Does anyone know an elegant way to solve this updated problem?
## Thank you! |0 |2 |9 |NA |NA |
## | 76387472|56 |0 |r |2023-06-02 05:57:47Z |HJ WHY | 19404669|19 |My goal is to change value 99999 with the value adjacent to it unless it's 99999 again.
## I took the advice from here before, now I am having a new problem.
## MRE:
## 'as' is a dataframe with 9 different cohort datasets; 10030 obs of 7060 variables. I am mainly (as of now) dealing with as$AS1_WEIGHT ... as$AS9_WEIGHT
## > as %>%
## + select(starts_with("AS") & ends_with("_WEIGHT")) %>% head() %>% dput()
##
## structure(list(AS1_WEIGHT = c(72, 59, 50, 55.2, 82.1, 50.4),
## AS2_WEIGHT = c(74.8, NA, NA, 54.8, 84.5, 52.5), AS3_WEIGHT = c(75.2,
## NA, NA, 55.9, 81.7, 54.6), AS4_WEIGHT = c(75, NA, NA, 55.1,
## 80.6, NA), AS5_WEIGHT = c(75.4, NA, NA, 58.8, 89.5, NA),
## AS6_WEIGHT = c(77.3, NA, NA, NA, NA, NA), AS7_WEIGHT = c(70.7,
## NA, NA, 56, NA, NA), AS8_WEIGHT = c(73.8, NA, NA, 55.5, NA,
## NA), AS9_WEIGHT = c(74.5, NA, NA, 54.8, NA, 52)), row.names = c(NA,
## -6L), class = c("tbl_df", "tbl", "data.frame"))
##
##
##
## as %<>%
## mutate(row = row_number()) %>%
## tidyr::pivot_longer(starts_with("AS") & ends_with("_WEIGHT")) %>%
## mutate(value = if_else(value == '99999', lead(value), value), .by = row) %>%
## pivot_wider(names_from = name, values_from = value)
##
## returns:
## Error in tidyr::pivot_longer():
## ! Names must be unique.
## â These names are duplicated:
## "name" at locations 7049 and 7053.
## "value" at locations 7050 and 7054.
## â¹ Use argument names_repair to specify repair strategy.
## Run rlang::last_trace() to see where the error occurred.
## So I ran this code to see which columns are duplicated:
## > dup_col <- duplicated(base::as.list(as))
## colnames(as[dup_col])
##
## character(0)
##
## I ran another code to see if I am referring to the right columns
## > as %>%
## select(starts_with("AS") & ends_with("_WEIGHT")) %>%
## colnames()
##
## [1] "AS1_WEIGHT" "AS2_WEIGHT" "AS3_WEIGHT" "AS4_WEIGHT" "AS5_WEIGHT" "AS6_WEIGHT" "AS7_WEIGHT" "AS8_WEIGHT"
## [9] "AS9_WEIGHT"
##
##
## Thank you in advance! |0 |0 |7 |HJ WHY |2023-06-02 06:09:21Z, 2023-06-02 07:08:58Z |
## | 76387441|39 |0 |r,stata,nnet,marginal-effects |2023-06-02 05:49:57Z |Nader Mehri | 9319259|491 |I have fit a multinomial logistic model as shown below. Using the margins command, I would like to obtain the predicted values for my outcome variable while setting the predictors to some specific values. In other words, I would like to replicate the Stata code below. Chatgpt gave me code that I pasted below as well but it returns an error of "Error in find_terms_in_model.default(model, variables = variables): Some values in 'variables' are not in the model terms."
## my multinomial logist code
## library(nnet)
## model <- multinom(obesity_E ~ age_100 + I(age_100^2) + obesity + age_100:obesity +
## covid:race + age_100:race + education + education:race +
## rabplace_5, data = female_98_2020, maxit=1000)
##
## Code suggested by ChatGPT
## margins_model <- margins(model, variables = "obesity:race:rabplace_5",
## at = list(age_100 = seq(0, 25, 1)), atMethod = "mean",
## method = "probs", force = TRUE, noSe = TRUE,
## save = "tran_point_F", replace = TRUE)
##
## My Stata code
## margins , at (age_100=(0 (1) 25) obesity=(1 (1) 4) race=(0 (1) 3) rabplace_5=(1 (1) 5)) atmeans force nose saving(tran_point_F, replace ) |0 |4 |20 |Nick Cox |2023-06-02 06:38:52Z |
## | 76387313|17 |0 |r,machine-learning,mixture-model,gmm |2023-06-02 05:18:31Z |Omarov Alen | 14292341|25 |I'm working on implementing a Gaussian Mixture Model (GMM) for three-way data (i.e., a set of matrices) in R. The GMM is being estimated using the Expectation-Maximization (EM) algorithm. However, I'm encountering an issue during the Expectation (E) step of the algorithm.
## In the E step, I'm calculating the posterior probabilities (responsibilities) for each data matrix belonging to each component of the mixture model. These are calculated as the product of the matrix-normal density of the data matrix given the parameters of the component and the mixing weight of the component, divided by the sum of these products over all components.
## The issue is that when I sum the posterior probabilities over all components for each data matrix (using colSums(comp.post)), I'm not getting a result of 1 for all data matrices, as I would expect.
## Here's the relevant portion of the code:
## #' Expectation Step of the EM Algorithm for Gaussian Mixture Model
## #' with three-way data (a 3D array where each matrix is a data point).
## #' Calculate the posterior probabilities (soft labels) that each component
## #' has to each data matrix using matrix-normal distribution.
## #'
## #' @param Y Three-dimensional array containing the data, where each matrix represents a data point.
## #' @param M Array containing the mean matrix of each component, where each matrix represents a component.
## #' @param Phi List containing the row covariance matrix of each component.
## #' @param Omega List containing the column covariance matrix of each component.
## #' @param alpha Vector containing the mixing weights of each component.
## #' @return Named list containing the loglik and posterior.df for each data matrix
##
## e_step <- function(Y, M, Phi, Omega, alpha) {
##
## # Number of components in the mixture
## n_clusters <- length(alpha)
##
## # Number of matrices in the dataset
## n_matrices <- dim(Y)[3]
##
## # Calculate the log of the matrix-normal density for each matrix in Y for each component
## # and add the log of the mixing weight of the component
## log_comp.prod <- array(dim = c(n_clusters, n_matrices))
## for (i in 1:n_clusters) {
## for (j in 1:n_matrices) {
## log_comp.prod[i, j] <- matrixNormal::dmatnorm(Y[,,j], M[,,i], Phi[,,i], Omega[,,i]) + log(alpha[i])
## }
## }
##
## # Subtract the max to avoid overflow when exponentiating
## log_comp.prod <- log_comp.prod - max(log_comp.prod)
##
## # Calculate the log of the total density for each matrix in Y
## log_sum.of.comps <- log(colSums(alpha * exp(log_comp.prod)))
##
## # Subtract the log of the total density from the log of the density for each component
## # to get the log of the posterior probabilities (responsibilities)
## log_comp.post <- log_comp.prod - log_sum.of.comps
##
## # Exponentiate to get back to the original scale
## comp.post <- exp(log_comp.post)
##
## # Calculate the log-likelihood as the sum of the log of the total densities
## loglik <- sum(log_sum.of.comps)
##
## return(list("loglik" = loglik, "posterior.df" = comp.post))
## }
##
## Here what I get:
## colSums(comp.post)
## [1] 3.739240e+01 6.498986e-02 9.844537e+00 3.475485e+00 9.047767e+03 2.196267e-01
## [7] 2.079427e+01 1.165800e+02 1.405744e-01 1.353819e+01 2.433372e+00 2.051357e+00
## [13] 4.472772e+00 3.597247e-02 3.210629e-01 8.761967e-01 4.396359e+01 3.265571e+02
## [19] 1.247715e+02 3.616610e-02 4.361902e-01 2.035783e-02 5.585075e+01 3.328536e+00
## [25] 5.880054e+00 2.166311e-01 5.388875e+02 3.931191e+01 1.642435e+00 5.129309e-01
##
## I've tried different ways to compute the matrix-normal density, including using the matrixNormal::dmatnorm() function with log = FALSE and log = TRUE, and writing a custom function to compute the density. However, none of these approaches have resolved the issue.
## I use this formula for my calculations:
##
## I'm not sure what's causing this issue or how to fix it. Any insights or suggestions would be greatly appreciated. Thank you! |0 |0 |3 |Omarov Alen |2023-06-02 23:44:58Z |
## | 76387295|16 |0 |r,glmnet |2023-06-02 05:13:29Z |lorenz | 22004825|1 |I am trying to install the package glmnet4.1.7 in R 4.2.3
## Server details: RHEL 7.9
## gcc 7.1
## Server has No internet. I downloaded the glmnet tarball and tried to install using the command R CMD install glmnet
## When installing glmnet it fails with below error (attached):
## any suggestions on how to solve the above error? like which lgfortran file must exist in which path? etc..
## What I tried so far:
## I noticed that there is a libgfortran.so file in /lib64.. I added this in LD_LIBRARY_PATH environment variable but same issue is there, |0 |0 |0 |NA |NA |
## | 76387189|46 |1 |r,installation |2023-06-02 04:45:50Z |Vincent Laufer | 2727150|705 |I am attempting to install an R package named 'infercna', the github repository to which is linked here.
## The install process attempts to load another package named 'scalop', which is linked here.
## Specifically, this command:
## devtools::install_github("jlaffy/infercna")
##
## returns
## Downloading GitHub repo jlaffy/infercna@HEAD
## ââ R CMD build ââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
## â checking for file â/private/var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T/RtmpqQIEYL/remotes7d75586a9ac5/jlaffy-infercna-98a8db8/DESCRIPTIONâ (343ms)
## â preparing âinfercnaâ:
## â checking DESCRIPTION meta-information ...
## â checking for LF line-endings in source and make files and shell scripts
## â checking for empty or unneeded directories
## NB: this package now depends on R (>= 3.5.0)
## WARNING: Added dependency on R >= 3.5.0 because serialized objects in
## serialize/load version 3 cannot be read in older versions of R.
## File(s) containing such objects:
## âinfercna/data-raw/genes.rdaâ
## â building âinfercna_1.0.0.tar.gzâ
##
## * installing *source* package âinfercnaâ ...
## ** using staged installation
## ** R
## ** data
## *** moving datasets to lazyload DB
## ** byte-compile and prepare package for lazy loading
## Error in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]) :
## there is no package called âscalopâ
## Calls: <Anonymous> ... loadNamespace -> withRestarts -> withOneRestart -> doWithOneRestart
## Execution halted
## ERROR: lazy loading failed for package âinfercnaâ
## * removing â/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/infercnaâ
##
## As such, I backtracked and attempted to install scalop, like so:
## remotes::install_github("jlaffy/scalop")
##
## This is where things start to really get hairy. To install, scalop requires 95 dependencies. Upon successful installation of all 95, the installation for scalop will eventually still fail, like so:
## checking for file â/private/var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T/RtmpqQIEYL/remotes7d757fe15404/jlaffy-scalop-021999d/DESCRIPTIONâ ...
## â preparing âscalopâ: (385ms)
## â checking DESCRIPTION meta-information ...
## â cleaning src
## â checking for LF line-endings in source and make files and shell scripts
## â checking for empty or unneeded directories
## â building âscalop_1.1.0.tar.gzâ
##
## * installing *source* package âscalopâ ...
## ** using staged installation
## ** libs
## using C compiler: âApple clang version 11.0.3 (clang-1103.0.32.62)â
## using SDK: âMacOSX10.15.sdkâ
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I'/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include' -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c init.c -o init.o
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I'/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include' -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c nd.c -o nd.o
## nd.c:24:10: fatal error: 'S.h' file not found
## #include "S.h"
## ^~~~~
## 1 error generated.
## make: *** [nd.o] Error 1
## ERROR: compilation failed for package âscalopâ
## * removing â/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/scalopâ
##
## I am writing to ask if anyone knows enough about this output to know what to do to fix the "fatal error, 'S.h' file not found" error, which ultimately kills the download.
## Several people have reached out to the author, as per the issues posted on scalop; specifically issues 4 and 5, but no reply. Additionally, posting the error message into google does not return useful hits, so far as I can see.
## Finally, I am happy to provide any and all necessary info; e.g. sessionInfo(), R version (4.3) Mac OS (11.7) etc.
## Help me Stack Overflow-Kenobi, you're my only hope. |0 |10 |26 |NA |NA |
## | 76387115|38 |0 |r,ggplot2,bar-chart,group |2023-06-02 04:29:56Z |Jess | 2100907|186 |I'm trying to make a side-by-side bar plot, by gender, with age category on the x axis and completion time on the y axis. One of my age categories has no males, so there is no data and there should be no bar there. The dodge_position2(preserve = "single") option won't work because in my example I need to specify a y aesthetic (I don't just want a frequency of all the datapoints in that age category).
## The instructions here show exactly my problem (with the double-wide bar), but I cannot find any information on how to modify it to accept an x and y aesthetic. I suspect I need to manually create these in a much more convoluted way. (Oh, also stacked bar graph won't work for my purpose.)
## My code attempts:
## The version with a double-wide bar for an age category missing males:
## p<-ggplot(d, aes(x=AgeGroup, y=Time, fill=Gender)) +
## geom_bar(stat="identity", position=position_dodge(), width = 0.7) +
## scale_fill_manual(values=c('#2A800A','#9CDC46'))
## p
##
## This version just makes almost all my bars disappear, I have no idea why:
## p<-ggplot(d, aes(x=AgeGroup, y=Time, fill=Gender)) +
## geom_bar(stat="identity", position=position_dodge(preserve="single"), width = 0.7) +
## scale_fill_manual(values=c('#2A800A','#9CDC46'))
## p
##
## This gives me error stat_count() must only have an x or y aesthetic.
## p<-ggplot(d, aes(x=AgeGroup, y=Time, fill=Gender)) +
## geom_bar(position = position_dodge2(preserve = "single")) +
## scale_fill_manual(values=c('#2A800A','#9CDC46'))
## p
##
## I'm not familiar enough with ggplot2 to create this from scratch. I would appreciate any suggestions! |0 |3 |13 |NA |NA |
## | 76387096|33 |1 |r,time-series,glm,arima,mle |2023-06-02 04:23:29Z |Leonardo19 | 20873851|83 |I am confused about the difference in the fitting results of the Arima() function and glm() function.
## I want to fit an AR(1) model with an exogeneous variable. Here is the equation:
## $$
## x_{t} = \alpha_{0} + \alpha_{1}x_{t-1} + \beta_{1}z_{t} + \epsilon_{t}
## $$
##
## Now I estimate this model using the Arima() function and glm() function and compare the results, but the results turned out to be quite different!
## Here is the sample data. x denotes the time-series variable, and z denotes the exogeneous variable, as shown in the equation above.
## library(forecast)
## library(tidyverse)
## data("Nile")
## df <-
## Nile %>%
## as_tibble() %>%
## mutate(x = as.numeric(x)) %>%
## mutate(z = rnorm(100))
##
## Then fit the model using the Arima() and glm() and compare the results.
## fit_arima <- Arima(df$x, order = c(1, 0, 0), include.mean = TRUE, xreg = df$z)
## tibble(Parameters = c("x lag", "intercept", "z"),
## Coefficients = coef(fit_arima),
## Standard_Errors = sqrt(diag(vcov(fit_arima))))
## fit_glm <- glm(df$x ~ lag(df$x) + df$z)
## tibble(Parameters = c("intercept", "x lag", "z"),
## Coefficients = coef(fit_glm),
## Standard_Errors = summary(fit_glm)$coefficients[, "Std. Error"])
##
## The results are displayed as follows.
## Arima() function:
## # A tibble: 3 Ã 3
## Parameters Coefficients Standard_Errors
## <chr> <dbl> <dbl>
## 1 x lag 0.510 0.0868
## 2 intercept 920. 29.4
## 3 z 5.02 12.1
##
## glm() function:
## # A tibble: 3 Ã 3
## Parameters Coefficients Standard_Errors
## <chr> <dbl> <dbl>
## 1 intercept 444. 83.4
## 2 x lag 0.516 0.0896
## 3 z 8.95 13.9
##
## The estimated coefficient and standard error of x lag are quite close, but the values of other two variables are very different. I find this puzzling because both the Arima() and glm() function use the maximum likelihood estimator. Could you please explain why this difference happens and how can I fix this? |0 |0 |5 |NA |NA |
## | 76386943|25 |0 |r,keras,neural-network,gradient |2023-06-02 03:25:23Z |Lily Xu | 21847814|1 |I try to build a neural network which use log-likelihood function as an out put using R language. The output is:
## function_gamma_true<-function(args){
## c(current_prob,current_zij,current_ki,current_zij0,predict_probabilities,
## log_probabilities,predict_alpha,lgamma_alpha,predict_beta,log_beta,
## predict_shape,log_shape,predict_scale,log_scale,log_y_scale,
## predict_alpha_splicing,lgamma_alpha_splicing,predict_beta_splicing,
## log_beta_splicing,y_observations,log_y_observations) %<-% args
## log_probabilities0=log_probabilities[,1:no_densities]
## loglik<-(
## k_sum(current_zij[,1:no_densities,drop=FALSE]*
## (log_probabilities0+predict_alpha*log_beta-lgamma_alpha+predict_alpha*log_y_observations
## -log_y_observations-predict_beta*y_observations+current_zij[,no_densities+1]*
## k_log(1-predict_beta_splicing^predict_alpha_splicing/exp(lgamma_alpha_splicing)*
## y_observations^(predict_alpha_splicing-1)*k_exp(-predict_beta_splicing*y_observations))),
## axis=2,keepdims=TRUE)+current_zij[,no_densities+1,drop=FALSE]*
## (log_probabilities[,no_densities+1,drop=FALSE]+log_shape-log_scale-predict_scale*(log_y_scale-log_scale)-(log_y_scale-log_scale)+
## predict_alpha_splicing*log_beta_splicing-lgamma_alpha_splicing+predict_alpha_splicing*
## log_y_observations-log_y_observations-predict_beta_splicing*y_observations)+
## current_ki*k_sum(current_zij0*log_probabilities,axis=2,keepdims=TRUE)
## )
## }
## loglik<-layer_lambda(list(current_prob,current_zij,current_ki,current_zij0,predict_probabilities,
## log_probabilities,predict_alpha,lgamma_alpha,predict_beta,log_beta,
## predict_shape,log_shape,predict_scale,log_scale,log_y_scale,
## predict_alpha_splicing,lgamma_alpha_splicing,predict_beta_splicing,
## log_beta_splicing,y_observations,log_y_observations),
## function_gamma_true)
##
##
## I use 0 as y_ture. So that I can get a training result make the likelihood function reach maximum. The loss function is:
## max_loss<-custom_metric("max",function(y_true,y_pred){k_mean(y_true-y_pred)})
##
## The error message is:
## Error in py_call_impl(callable, dots$args, dots$keywords) :
## tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
## <... omitted ...>.train_step(data)
## File "D:\Software\R\renv\python\R-RETI~1\lib\site-packages\keras\engine\training.py", line 1027, in train_step
## self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
## File "D:\Software\R\renv\python\R-RETI~1\lib\site-packages\keras\optimizers\optimizer_experimental\optimizer.py", line 526, in minimize
## grads_and_vars = self.compute_gradients(loss, var_list, tape)
## File "D:\Software\R\renv\python\R-RETI~1\lib\site-packages\keras\optimizers\optimizer_experimental\optimizer.py", line 259, in compute_gradients
## grads = tape.gradient(loss, var_list)
## Node: 'gradient_tape/model_15/lambda_7/Add_2/BroadcastGradientArgs'
## Incompatible shapes: [1024,3] vs. [1024,1024]
## [[{{node gradient_tape/model_15/lambda_7/Add_2/BroadcastGradientArgs}}]] [Op:__inference_train_function_4724]
## See reticulate::py_last_error() for details
## How can I debug this error?
## Any help here is truly needed and is well appreciated. |0 |0 |0 |neilfws |2023-06-02 03:39:43Z |
## | 76386904|33 |0 |r,merge,average,wide-format-data |2023-06-02 03:10:39Z |oriole1231 | 22001942|1 |I am working on a dataset where I collected moths from trees in several fields. 3 of each tree type was sampled. In some cases, some fields were sampled on the same dates, but not always.
## At each tree, samples were taken from the "top," "middle," and "bottom" sections. Because of this, there are 3 rows of values for each tree.
## I would like to somehow merge those 3 rows into one, so that we can calculate what the average number of moths per tree was. I would like to do this for all combined species as well as for each separate species.
## I am also curious to know how I would be able to calculate the average number of moths per each type of tree, and per each sample location (top, middle, bottom).
## I am sure that this is probably an easy fix, but R is pretty new to me. I have included a sample of the data below. I condensed it so that it wouldn't be too large. I hope that I used the dput() function correctly, and that my questions make sense. Thank you all in advance!
## treedata01 <- structure(list(Date = c("4/14/2023", "4/14/2023", "4/14/2023",
##
## "4/14/2023", "4/14/2023", "4/14/2023", "4/14/2023", "4/14/2023",
##
## "4/14/2023", "4/14/2023", "4/14/2023", "4/14/2023", "", "", "",
##
## "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""),
##
## Field = c("Field 3", "Field 3", "Field 3", "Field 3", "Field 3",
##
## "Field 3", "Field 3", "Field 3", "Field 3", "Field 3", "Field 3",
##
## "Field 3", "", "", "", "", "", "", "", "", "", "", "", "",
##
## "", "", "", "", "", ""), TreeNumber = c(1L, 1L, 1L, 2L, 2L,
##
## 2L, 1L, 1L, 1L, 2L, 2L, 2L, NA, NA, NA, NA, NA, NA, NA, NA,
##
## NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), TreeType = c("Oak",
##
## "Oak", "Oak", "Oak", "Oak", "Oak", "Sycamore", "Sycamore",
##
## "Sycamore", "Sycamore", "Sycamore", "Sycamore", "", "", "",
##
## "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
##
## ), TreeAge = c("Old", "Old", "Old", "Old", "Old", "Old",
##
## "Old", "Old", "Old", "Old", "Old", "Old", "", "", "", "",
##
## "", "", "", "", "", "", "", "", "", "", "", "", "", ""),
##
## Sample.Location = c("Top", "Middle", "Bottom", "Top", "Middle",
##
## "Bottom", "Top", "Middle", "Bottom", "Top", "Middle", "Bottom",
##
## "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
##
## "", "", ""), SpeciesA = c(80L, 1L, 50L, 0L, 0L, 0L, 8L, 10L,
##
## 13L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
##
## NA, NA, NA, NA, NA, NA, NA, NA), SpeciesB = c(7L, 16L, 6L,
##
## 0L, 0L, 0L, 5L, 11L, 2L, 0L, 0L, 0L, NA, NA, NA, NA, NA,
##
## NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), SpeciesC = c(17L,
##
## 23L, 2L, 0L, 0L, 0L, 5L, 4L, 0L, 0L, 0L, 0L, NA, NA, NA,
##
## NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
##
## ), SpeciesD = c(19L, 5L, 1L, 0L, 0L, 0L, 13L, 15L, 78L, 0L,
##
## 0L, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
##
## NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
##
## -30L))
##
## Someone recommended that my data was in the incorrect format, so they recommended that I try something like this:
## # reformat?
##
## treedata <- data1 %>% pivot_longer(cols = 7:10, names_to = "Species", values_to = "count")
##
## #
##
## treedata1$Species <- substr(treedata2$Species)
##
## This is the code I tried, but I don't think it works with the way I have the data set up?
## combined = treedata01|>
## summarize(across(starts_with("Species"), sum),
## .by = c("TreeType", "Sample Location", "Date")) |>
## summarize(across(starts_with("Species"), mean), .by = c("TreeType"))
##
## I am not sure, but I guess I am expecting to make a column of the value for each sampled tree, and somehow take the average of collected species we would expect to find per tree, and so on? I think that my data is in the wide-format form. |0 |0 |0 |jnpdx |2023-06-02 04:33:07Z |
## | 76386871|16 |0 |r,visual-studio |2023-06-02 02:58:20Z |Eden | 3878688|335 |I just got started with MS Visual Studio and am liking it. However, for work reasons I keep all my packages in a custom folder, which is not the default folder for packages. I have modified my .Rprofile file to set .libPaths() to this path so when I start R by itself it uses that path. How do I do that for MS Visual Studio?
## Please note, I am asking how I get R to automatically know where my packages are when it's started in Visual Studio, not how to issue .libPaths() whenever I start R in VS. If it matters, I'm using VS 1.78 (from 2023). |0 |2 |8 |NA |NA |
## | 76386843|47 |0 |r,dplyr |2023-06-02 02:49:28Z |rez | 16817141|238 |I want to first rename my x variable to newX and then mutate a new column name z based on values of newX and y. Putting rename and mutate in same chunk result in an error.
## Is there a way to use option 1 without an error?
## set.seed(1)
## dat <- data.frame(
## x = sample(c(0,1), size = 5, replace = TRUE),
## y = sample(c(0,1), size = 5, replace = TRUE)
## )
## # option 1
## dat %>%
## rename(newX = x) %>%
## mutate(z = rowSums(dat[c("newX", "y")] == 1))
##
## # option 2
## dat <- dat %>%
## rename(newX = x)
## dat %>%
## mutate(z = rowSums(dat[c("newX", "y")] == 1)) |0 |2 |12 |zephryl |2023-06-02 03:20:27Z, 2023-06-02 03:17:35Z |
## | 76386840|15 |0 |r,text,quanteda |2023-06-02 02:48:35Z |mgd_aus | 21250261|1 |I am tokenizing a corpus of text but am having issues with removing punctuation. It seems as though most punctuation is being removed, but not apostrophes.
## Code here:
## all.tokens <-
## tokens(
## my.corpus,
## remove_punct = TRUE,
## remove_numbers = TRUE,
## remove_symbols = TRUE,
## remove_url = TRUE
## ) %>% tokens_tolower() %>% tokens_select(pattern = stopwords("en"), selection = "remove")
##
## Anyone else have this issue? Any workarounds? Thanks! |0 |0 |2 |mgd_aus |2023-06-02 02:50:39Z |
## | 76386786|34 |0 |r,ggplot2,tidyverse,r-glue |2023-06-02 02:27:09Z |joshbrows | 21612376|65 |I would like to create a plot using plotly::ggplotly(). This works just fine.
## library(ggplot2)
## #> Warning: package 'ggplot2' was built under R version 4.2.2
##
## p <-
## mtcars |>
## ggplot() +
## geom_histogram(
## aes(
## x = disp,
## text = after_stat(count)
## )
## )
## #> Warning in geom_histogram(aes(x = disp, text = after_stat(count))): Ignoring
## #> unknown aesthetics: text
##
## plotly::ggplotly(p)
## #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##
##
## There is a nice trick with ggplotly() that allows me to use dummy aesthetics to display in the tooltip. Below, I use the text aesthetic for this purpose. However, glue() doesn't work with after_stat(). How can I fix this?
## library(ggplot2)
## #> Warning: package 'ggplot2' was built under R version 4.2.2
##
## p <-
## mtcars |>
## ggplot() +
## geom_histogram(
## aes(
## x = disp,
## text = glue::glue('{after_stat(count)}')
## )
## )
## #> Warning in geom_histogram(aes(x = disp, text =
## #> glue::glue("{after_stat(count)}"))): Ignoring unknown aesthetics: text
##
## plotly::ggplotly(p)
## #> Error in after_stat(count): object 'count' not found |0 |0 |7 |NA |NA |
## | 76386732|26 |0 |r |2023-06-02 02:12:06Z |Stephen | 5411912|473 |Sometimes I highlight my R code, and then I click "Run", and it runs twice. It must be the way I am clicking, but is there some way to stop this? |0 |4 |11 |NA |NA |
## | 76386620|55 |0 |r,dplyr,tidy,rstatix |2023-06-02 01:26:55Z |sahuno | 7846884|129 |I have data dataframe with two groups, Tumor and Normal. for each site/row i want calculate fischer exact for using Methyl UnMethy between Tumor and Normal
## I'm looking for how to transform data to calculate fisher exact for each site using dplyr approach.
## methyl_dat <- data.frame(loci = c("site1", "site2", "site3", "site4"),
## Methy.tumor = c(50, 5, 60, 12),
## UnMethy.tumor = c(60, 0, 65, 5),
## Methy.Normal = c(13, 5, 22, 3),
## UnMethy.Normal = c(86, 0, 35, 3) )
##
## Here is Fischer exact strategy
## for site 1
## Normal
## Tumor Methyl UnMethy
## Methy 50 13
## UnMethy 60 86 |0 |0 |7 |sahuno |2023-06-02 01:48:24Z |
## | 76386429|31 |0 |r,web-scraping,rvest |2023-06-02 00:08:29Z |pparamo | 21958113|1 |Problem downloading pdfs from a website that stores them on AWS using rvest
## I am trying to download ~500 individual PDFs from this government webpage using rvest. The links on the site point to PDFs stored on a separate AWS site (for example this document).
## When I download the PDFs, I can't open them. I don't think I am actually downloading the linked PDFs from the AWS site. The links don't include a .pdf file type (e.g. https://getinvolved.mdba.gov.au/22346/widgets/139364/documents/47013) and I think I'm missing a step to download the actual PDFs.
## Here is a reproducible example
## #load packages
##
## library(tidyverse)
## library(rvest)
## library(polite)
##
## # scrape PDF links and names
##
## mdba_NB_url <- "https://getinvolved.mdba.gov.au/bp-amendments-submissions/widgets/139364/documents"
##
## session <- bow(mdba_NB_url, force = TRUE) # from the polite package, identify and respect any explicit limits
##
## NB_page <- scrape(session) # scrape the page contents
##
## download_links <- tibble(link_names = NB_page %>% #download links
## html_nodes("a")%>%
## html_text(),
## link_urls = NB_page %>%
## html_nodes("a") %>%
## html_attr('href'))
## #filter PDFs
##
## download_links_docs <- download_links %>%. #limit links to PDFs I need
## filter(str_detect(link_names, "No. [0-9]"))
##
## download_links_docs_subset <- download_links_docs %>%. #subset for test download
## slice(c(1:10))
##
## # Download PDFs
##
## my_urls <- download_links_docs_subset$link_urls
## save_here <- paste0(download_links_docs_subset$link_names, ".pdf")
## mapply(download.file, my_urls, save_here, mode = "wb") |0 |0 |2 |pparamo |2023-06-02 00:14:27Z |
## | 76386409|43 |0 |r,spline,gam |2023-06-02 00:02:27Z |Russ Conte | 7589075|122 |I'm working with the Boston Housing data set in the MASS package. The desired goal is something like this:
## library(MASS)
## library(tidyverse)
## library(gam)
## Boston.splines <- gam(medv ~ s(crime) + s(zn) + s(indus), data = Boston)
##
## I can get everything except the spline function to work automatically:
## names_Boston <- names(Boston[,1:4])
##
## f1 <- paste("medv ~", paste(names_Boston, collapse = "+"))
## f1 <- as.formula(f1)
## Boston1.gam <- gam(f1, data = Boston)
##
## But for the life of me I can't seem to get the s() function to be added to the front of each of the column names.
## I've tried dplyr and base R, nothing works. For example, this:
## set_names(paste0('s(', paste0(names_Boston), paste0')))
##
## returned an error message:
## Error: unexpected string constant in "set_names(paste0('s(', paste0(names_Boston), paste0')'"
##
## What is a way to automatically add the smoothing spline function to column names to result in a formula such as gam(medv ~ s(crime) + s(zn) + s(indus), data = Boston)? |0 |0 |6 |NA |NA |
## | 76386402|37 |0 |r,dplyr |2023-06-01 23:59:07Z |Hank | 4118306|51 |In our team, we rely heavily on the dplyr package in our R scripts. Recently, we encountered a compatibility? issue related to different versions of RStudio and dplyr.
## Some team members have the latest version of RStudio and dplyr (1.1.2), while others are using dplyr version 1.1.0. Interestingly, when team members with the newer version run the same code, they encounter a mutate error, while those with the older version do not face any issues.
## We suspect that this error might be due to the package version difference, but we wanted to share our experience and seek suggestions from the community.
## We did find a temporary solution by commenting out the code section that triggers the mutate error. This allows the code to run without any issues.
## Here is the part of the code that gives mutate error :
## SY_Flag = case_when(
## SY_Dup2 == T & lag(SY_Dup2, default = F) == T &
## Location == 'Sales' & lag(Location == 'Sales') &
## SY_Del !=T & lag (SY_Del, default = T) != T
## ~ T
## )
##
##
##
##
## Has anyone else experienced a similar compatibility problem between different versions of RStudio and dplyr? If so, how did you resolve it?
## Any advice or suggestions would be greatly appreciated. Thank you! |0 |0 |4 |Hank |2023-06-02 00:36:44Z |
## | 76386356|36 |-1 |r,optimization,dplyr,pattern-matching,grepl |2023-06-01 23:42:50Z |stevejoobs | 22003778|1 |How do I optimize this pattern matching function?
## I have a function that accepts a dataframe and a vector of strings called targets. The function returns
## a data frame that keeps only the rows which have at least one value that matches one of the targets. I'm running this function on dataframes with up to 28 million rows, so speed is critical.
## match_targets <- function(df, target_codes){
## pattern = paste0("^", target_codes, collapse = "|")
## df <- df %>%
## filter( rowSums( sapply( ., grepl, pattern) ) > 0)
## }
##
## I'm new to the dplyr library, so maybe there's some functions that I am not taking advantage of. Please let me know if there's any way to improve the performance of this function
## Tried on large data, expected faster runtime |0 |0 |0 |NA |NA |
## | 76386347|34 |0 |r,filter,datatable,transformation,logarithm |2023-06-01 23:41:09Z |Feynman | 21958995|13 |I'm trying to convert the values of a data table to its log(a)+1 (base 2) into a transformed data table in R. But the first two columns of the data tables are not numbers so I need to exclude them from the calculation or I will get an error. I do know how to do these two operations individually but when I combine them, I get an error saying the first two columns are not numerical values.
## Here is my code:
## logDT = log(DT, 2) + 1[, -c(1, 2)]
##
## I am aware that I can just create an intermediary data table that excludes the columns first and then produce the log table, but I do need the first two columns in there in the log table so I'd rather just find a way to make my original code work.
## Here's an example code (the only real difference between this and my actual code is that my actual code has two non-numerical columns while this code has one):
## DT = data.table(x=c("b","b","b","a","a"),v=rnorm(5))
## x v
## 1: b -0.4149946
## 2: b -0.3942900
## 3: b -0.0593134
## 4: a 1.1000254
## 5: a 0.7631757 |0 |0 |3 |Feynman |2023-06-02 00:02:14Z, 2023-06-02 03:04:23Z |
## | 76386335|34 |0 |r,dataframe,tidyr |2023-06-01 23:37:26Z |Louise | 7010408|23 |Lets say I have created the following data frame:
## set.seed(215)
## names <- rep(letters[1:3], 4)
## class <- sample(c("x","y","z"), 12, replace = TRUE)
## df <- data.frame(names,class)
##
## This gives me the following data frame:
## df
## names class
## 1 a z
## 2 b z
## 3 c x
## 4 a x
## 5 b y
## 6 c x
## 7 a x
## 8 b z
## 9 c y
## 10 a y
## 11 b z
## 12 c x
##
## I want to collapse the rows such that each combination of name and class only appears once. In my example data above, I would have this:
## df2
## names class
## 1 a z
## 2 a x
## 3 a y
## 4 b z
## 5 b y
## 6 c x
## 7 c y
##
## I have tried:
## new_df <- df %>%
## tidyr::pivot_wider(., names_from = names, values_from = class) %>%
## tibble::rownames_to_column() %>%
## pivot_longer(-rowname) %>%
## pivot_wider(names_from=rowname, values_from=value) %>%
## as.data.frame()
##
## But, this results in the following warning message:
## Warning message:
## Values from `class` are not uniquely identified; output will contain list-cols.
## ⢠Use `values_fn = list` to suppress this warning.
## ⢠Use `values_fn = {summary_fun}` to summarise duplicates.
## ⢠Use the following dplyr code to identify duplicates.
## {data} %>%
## dplyr::group_by(names) %>%
## dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
## dplyr::filter(n > 1L)
##
## The resulting df looks like this:
## name 1
## 1 a z, x, x, y
## 2 b z, y, z, z
## 3 c x, x, y, x
##
## Please help me identify the simplest way to do this. I am trying to figure out a solution using base R or tidyverse, but am grateful for any help.
## Thank you!
## *edited to correct formatting of desired result. |0 |0 |5 |Louise |2023-06-01 23:41:55Z |
## | 76386256|19 |0 |r,plotly |2023-06-01 23:12:22Z |ACE | 6718205|309 |I have a couple of plotly graphs in R that need to be exported to powerpoint, while retaining the interactivity. Is there a way to do this? I came across a solution for this that I am posting here. But the graph is just exporting as an image that does not have interactive features:
## https://community.rstudio.com/t/export-plotly-graphs-into-ms-powerpoint/33789/3
## Does this code need to be tweaked in any way?
## library(officer)
## library(magrittr)
## library(plotly)
## library(htmlwidgets)
## library(webshot)
##
## temp_file_proc <- tempfile(pattern = "", fileext = ".png")
## outout_file_proc <- here::here("presentation.pptx")
##
## df <- mtcars
## x <- plot_ly() %>% add_markers(data=mtcars, x=~wt, y=~disp)
##
## saveWidget(x, "temp.html")
## webshot("temp.html", temp_file_proc)
##
## doc <- read_pptx()
## doc <- add_slide(doc, layout = "Title and Content", master = "Office Theme")
## doc <- ph_with(x = doc, value = "Title of the PPT", location = ph_location_type(type = "title"))
## image_add <- external_img(temp_file_proc, width = 5, height = 5)
## doc <- ph_with(x = doc, image_add,
## location = ph_location(left = 2.5, top = 2), use_loc_size = FALSE)
##
## print(doc, target = outout_file_proc) |0 |2 |12 |NA |NA |
## | 76386162|33 |0 |r,merge,mutate |2023-06-01 22:48:53Z |Kate | 21996498|31 |so I basically have two dataframes mutated from a single dataframe 'original_data':
## I used the mutate function on 'original data' to keep only values <= 600 seconds, all the other values would return as NA, this is stored in 'new_data_one'.
## I then used the mutate function on 'original data' again to keep a single value >600, all the other values would return as NA, this is stored in 'new_data_two'.
## Both of these dataframe retained their structure, so something like this:
## 'original_data'
## 1 2 3 4 5
## R1 80 98 150 599 650
## R2 90 95 254 725 905
## R3 93 96 540 650 780
##
## 'new_data_one'
## 1 2 3 4 5
## R1 80 98 150 599 NA
## R2 90 95 254 NA NA
## R3 93 96 540 NA NA
##
## 'new_data_two'
## 1 2 3 4 5
## R1 NA NA NA NA 650
## R2 NA NA NA 725 NA
## R3 NA NA NA 650 NA
##
## How do I merge the two mutated dataframes ('new_data_one' and 'new_data_two'), so I get an output of something like this (that essentially retains the same col/row structures)?
## 'desired_merged_dataframe'
## 1 2 3 4 5
## R1 80 98 150 599 650
## R2 90 95 254 725 NA
## R3 93 96 540 650 NA |0 |0 |3 |NA |NA |
## | 76386109|16 |0 |r,statistics |2023-06-01 22:31:40Z |Jenny | 21634088|1 |I currently am trying to make a regression in R to compare my AB test for a film recommender results to see which one performs better. Currently, I am opting for a fixed effects model (plm). However, the issue I run into is that I want to control for a lot of categorical variables as it is movie data.
## For example, I need to account for user fixed effects (some users watch more movies than other in general), content fixed effects (some movies may be watched more than others), time (some days movies are watched more than others).
## I tried to run this in my regression, and I receive as output a long list of every user. It makes sense, but that's not really something I can compare in the end...
## Any ideas?
## I tried using the plm function, to create a fixed effects model. My function looks like this:
## watched_seconds ~ groupB, genre, time, language, user
##
## I expected a regression that shows the coefficient of groupB being higher or lower than groupA. |0 |0 |0 |neilfws |2023-06-01 22:33:03Z |
## | 76386104|24 |0 |r,ggplot2,likert |2023-06-01 22:30:53Z |Mauro r | 2009260|15 |Grouped likert scale using Likert package in R, results in plot with reversed scale.
## I have series of likert variables (strongly disagree-strongly agree) and a grouping variable from a survey. I am using Likert package in R and everything goes well until I want to plot one of those likert variables grouped by a grouping variable in a 100% stacked bars plot.
## In likert package to plot 100% stacked bar you can use the option centered=FALSE. This option works fine when plotting without grouping.
## In the following code you can see the problem arising in the 'L3' plot, in that the bars are inverted on the x axis compared to 'L2'. The blue color should be on the right and the yellows on the left. Note also that the percentages for neutral are not aligned, but they would be aligned if the bars were in the correct sense
## library(likert)
## library(tidyverse)
##
## n <- 3000
##
## set.seed(123)
##
## # Dataframe where Q1a,b,c are the likert variables with levels 1,2,3,4,5 and 3 is the centre (neutral answer)
## some_made_up_data <- data.frame(
## Q1a = as.factor(sample(c(1,2,3,4,5), n, replace = TRUE, prob = c(.03,.07,.2,.4, .3))),
## Q1b = as.factor(sample(c(1,2,3,4,5), n, replace = TRUE, prob = c(.02,.1,.2,.3, .3))),
## Q1c = as.factor(sample(c(1,2,3,4,5), n, replace = TRUE, prob = c(.05,.2,.2,.4, .2))),
## group = as.factor(sample(c("g1", "g2", "g3", "g4", "g5"), n, replace = TRUE))
## )
##
## # Simple Plot 100% stacked bars
## L1<-likert(some_made_up_data[,-4]) %>%
## plot(type="bar",
## centered=F
## )
##
## # Plot with grouping, standard divergent bars
## L2<-likert(some_made_up_data[,1,drop=FALSE],grouping = some_made_up_data$group) %>%
## plot(type="bar",
## centered=T
## )
##
##
## # Plot with grouping, 100% stacked bars
## ## Here I am subsetting the dataframe to use only the first of the likert variables and compare distribution s across groups
##
## L3<-likert(some_made_up_data[,1,drop=FALSE],grouping = some_made_up_data$group) %>%
## plot(type="bar",
## centered=F
## )
##
## # View plots
## L1
## L2
## L3
##
## I tried to play with the options of likert.options in the likert package, but without successAlso I cannot find any solution by searching online for simlar problem.
## I am expecting the bars representing the frequencies of the likert levels to be in the correct sense, lower likert values ont he left and high likert values on the right as in 'L2' plot.
## Any suggestion is very appreciated. Thank you. |0 |0 |4 |NA |NA |
## | 76386043|42 |0 |r,p-value |2023-06-01 22:12:05Z |Lara | 15804062|115 |I do have the following dataframe:
## df <- data.frame(
## Assay = c("Gene1", "Gene2", "Gene3"),
## DT1 = c(1,2,3),
## DT2 = c(4,5,6),
## DT3 = c(4,5,6),
## DT4 = c(0,8,7),
## DT5 = c(-1,2,5),
## DT6 = c(4,5,3),
## DT7 = c(5,2,9),
## DT8 = c(0,0,4),
## DT9 = c(3,6,2),
## DT10 = c(5,9,1),
## DT11 = c(2,3,4),
## DT12 = c(8,1,6)
## )
##
## And I would like to create a column that will contain p-values for groups compared row by row. First 5 columns (2:6) versus Next 7 columns (7:13)
## # Perform t-tests row-wise and obtain p-values
## p_values <- apply(df[, 2:6], 1, function(row) {
## t_test_result <- t.test(row, df[, 7:13])
## t_test_result$p.value
## })
##
## # Add the p-values column to the dataframe
## df$p_values <- p_values
## df
##
## For the first row when I use this script I have a p-value of 0.09335115 while if I do it manually:
## t.test(c(1,4,4,0,-1),
## c(4,5,0,3,5,2,8))
##
## I do have a p-value of 0.1425
## What's the issue? |0 |0 |7 |NA |NA |
## | 76386018|19 |0 |r,tidyverse |2023-06-01 22:05:43Z |MNK2008- | 18129490|33 |The first error I get is from the index(.))) line. Which is object '.' not found. When I put '.' into the index line I get an error in the spread(asset, return), which is Each row of output must be identified by a unique combination of keys. Please find code and spread error:
## # Converting Daily Prices to Monthly Returns in the tidyverse
## asset_returns_dplyr =
## prices |>
## to.monthly(indexAt = "lastof", OHLC = FALSE) |>
## # convert the index to a date
## data.frame(date = index(.)) |>
## # now remove the index because it got converted to row names
## remove_rownames() |>
## gather(asset, prices, -date) |>
## group_by(asset) |>
## mutate(returns = (log(prices) - log(lag(prices)))) |>
## select(-prices) |>
## spread(asset, returns) |>
## select(date, symbols)
## na.omit()
##
## Error in `spread()`:
## ! Each row of output must be identified by a unique combination of keys.
## â¹ Keys are shared for 625 rows
## ⢠501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517,
## 518, 519, 520, 521, 522,
## 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 5
## 540, 541, 542, 543, 544,
## 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561,
## 562,
##
## structure(c(117.827011108398, 120.846908569336, 120.57389831543,
## 121.103385925293, 120.772438049316, 120.424964904785,
## 120.73104095459,
## 121.690849304199, 121.682556152344, 121.59984588623,
## 42.5093727111816,
## 43.1672821044922, 42.7486114501953, 42.9654159545898,
## 42.7785224914551,
## 42.5392799377441, 42.7560882568359, 43.3018455505371,
## 43.3915596008301,
## 43.4438934326172, 34.7362937927246, 35.6893730163574,
## 35.6421546936035,
## 35.9212112426758, 35.7108459472656, 35.5734710693359,
## 35.6765060424805,
## 35.7194404602051, 35.693675994873, 35.6850929260254,
## 35.7743263244629,
## 36.4761009216309, 36.2179832458496, 36.2905693054199,
## 36.0163269042969,
## 35.6936645507812, 35.8469200134277, 36.1615180969238,
## 35.8711166381836,
## 36.0243835449219, 86.7947540283203, 86.6931762695312,
## 86.4743957519531,
## 86.5681304931641, 86.5212860107422, 86.5993957519531,
## 86.5369033813477,
## 86.5290603637695, 86.6150131225586, 86.6931762695312), class =
## c("xts",
## "zoo"), src = "yahoo", updated = structure(1685658877.66735, class
## = c("POSIXct",
## "POSIXt")), index = structure(c(1356912000, 1357084800, 1357171200,
## 1357257600, 1357516800, 1357603200, 1357689600, 1357776000,
## 1357862400,
## 1358121600), tzone = "UTC", tclass = "Date"), dim = c(10L, 5L
## ), dimnames = list(NULL, c("SPY", "EFA", "IJS", "EEM", "AGG"))) |0 |0 |4 |MNK2008- |2023-06-02 00:08:05Z |
## | 76385903|40 |0 |r,random,datatable,sample |2023-06-01 21:42:41Z |Feynman | 21958995|13 |I'm trying to randomly select 50 columns in a data table in R (the original table has 110 columns and 1000+ rows). But when I run the code, the resulting table only has 50 rows (I am not trying to filter out the rows, only the columns).
## randomTable = sample_n(ogTable, 50, axis = 'columns')
## I looked up this issue and it seems like this function doesn't display all rows if it exceeds the number of resulting columns, but I could not find a way to get around this. |0 |0 |3 |NA |2023-06-01 22:27:43Z, 2023-06-01 22:14:44Z |
## | 76385735|50 |1 |r,interpolation,tidyr |2023-06-01 21:11:36Z |Calvin | 22003198|11 |I am attempting to interpolate (linearly or by other methods) between values in an R data frame. The values are grouped by a date_index in a very large dataframe, and the interpolation is between the values on a daily basis based on the f_dates. The number of f_dates changes so the interpolation, presumably, needs to be grouped for each date_index and then ungrouped into a larger data frame? See example below.
## My attempt at the code is as follows, but I can't seem to get the grouping and interpolation to work.
## df <- data.frame("date_index"=c("20230104", "20230104", "20230104", "20230106", "20230106", "20230106"),
## "f_date"=c("20230201", "20230203", "20230210", "20230201", "20230203", "20230210"),
## "value" = c(1.50, 2.00, 3.25, 4.00, 3.00, 2.50)) %>%
## mutate(date_index=ymd(date_index), f_date=ymd(f_date))
##
## df %>%
## group_by(date_index) %>%
## arrange(f_date)
##
## date_first <- first(df$f_date)
## date_last <- last(df$f_date)
##
## ApproxFun <- approxfun(x = df$f_date, y = df$value)
##
## date_seq <- seq.Date(ymd(date_first), ymd(date_last), by = 1)
##
## LinearFit <- ApproxFun(date_seq)
##
## # the interpolated dataframe
## dfi <- data.frame(f_date = date_seq, value = LinearFit)
##
##
## The output I was looking for should be as follows:
## date_index f_date value
## 2023-01-04 2023-02-01 1.50
## 2023-01-04 2023-02-02 1.75
## 2023-01-04 2023-02-03 2.00
## 2023-01-04 2023-02-04 2.18
## 2023-01-04 2023-02-05 2.36
## 2023-01-04 2023-02-06 2.54
## 2023-01-04 2023-02-07 2.71
## 2023-01-04 2023-02-08 2.89
## 2023-01-04 2023-02-09 3.07
## 2023-01-04 2023-02-10 3.25
## 2023-01-06 2023-02-01 4.00
## 2023-01-06 2023-02-02 3.50
## 2023-01-06 2023-02-03 3.00
## 2023-01-06 2023-02-04 2.93
## 2023-01-06 2023-02-05 2.86
## 2023-01-06 2023-02-06 2.79
## 2023-01-06 2023-02-07 2.71
## 2023-01-06 2023-02-08 2.64
## 2023-01-06 2023-02-09 2.57
## 2023-01-06 2023-02-10 2.50 |0 |0 |3 |Dave2e |2023-06-01 23:08:08Z |
## | 76385712|42 |1 |r,ggplot2,width,boxplot |2023-06-01 21:07:18Z |hollyrose54 | 21120951|15 |I am struggling with boxplots in R studio. I have a data set that looks like this:
##
## Site
## date
## Chloride_mgL
## B
## 2021-06-08 12:00:00
## 29.13800
## D
## 2021-06-08 12:00:00
## 29.41100
## D
## 2021-06-15 12:00:00
## 29.92400
## E
## 2021-06-08 12:00:00
## 29.29100
## A
## 2021-06-08 12:00:00
## 29.16800
## D
## 2021-06-22 12:00:00
## 32.79500
## C
## 2021-06-22 12:00:00
## 30.59200
##
## Site is characters, date is POSIXct, and Chloride is numeric. I need to make box plots with date on the x axis and Chloride on the y axis, grouped by date. So, for example, I need a box on 2021-06-08 12:00:00 that is made of all of the Chloride values from all of the sites on that date/time. I can get that made, but for some reason when I make it the boxes are tiny and I need them to be bigger, but the width = function just makes the boxes into skinny lines, and not boxes at all.
## This is how I make the box plots:
## c <- ggplot(sylvan[sylvan$Site %in% c("A", "B", "C", "D", "E", "F", "G") & sylvan$Chloride_mgL > 25, ], aes(x = date, y = Chloride_mgL, group = date)) +
## geom_boxplot(outlier.shape = NA, color = "dark blue") + theme_minimal() + labs(y ="Chloride (mg/L)")
## c
##
## Which gives this:
##
## Great, but I want bigger boxes. So I try to use the width argument:
## c <- ggplot(sylvan[sylvan$Site %in% c("A", "B", "C", "D", "E", "F", "G") & sylvan$Chloride_mgL > 25, ], aes(x = date, y = Chloride_mgL, group = date)) +
## geom_boxplot(width = 2, outlier.shape = NA, color = "dark blue") + theme_minimal() + labs(y ="Chloride (mg/L)")
## c
##
## And then it turns into this:
##
## How can I make the boxes wider instead of just turning into skinny little lines? Thanks so much!
## Edit: Tried @TarJae solution, and it turned out like this:
## enter image description here |0 |0 |3 |hollyrose54 |2023-06-01 21:53:37Z, 2023-06-01 23:16:17Z, 2023-06-01 23:13:03Z |
## | 255697|2k |5 |math,r,statistics,bayesian,dirichlet |2008-11-01 15:48:30Z |Alex Coventry | 1941213|68,251 |I'm looking for a an R package which can be used to train a Dirichlet prior from counts data. I'm asking for a colleague who's using R, and don't use it myself, so I'm not too sure how to look for packages. It's a bit hard to search for, because "R" is such a nonspecific search string. There doesn't seem to be anything on CRAN, but are there any other places to look? |4 |36 |40 |NA |NA |
## | 127137|25k |24 |r |2008-09-24 13:31:07Z |kaybenleroll | 277|16,784 |As it currently stands, this question is not a good fit for our Q&A format. We expect answers to be supported by facts, references, or expertise, but this question will likely solicit debate, arguments, polling, or extended discussion. If you feel that this question can be improved and possibly reopened, visit the help center for guidance.
##
##
##
##
##
## Closed 11 years ago.
##
##
##
## Does anyone know a good online resource for example of R code?
##
## The programs do not have to be written for illustrative purposes, I am really just looking for some places where a bunch of R code has been written to give me a sense of the syntax and capabilities of the language?
##
## Edit: I have read the basic documentation on the main site, but was wondering if there was some code samples or even programs that show how R is used by different people. |16 |53 |66 |NA |2013-04-12 00:18:57Z, 2009-07-23 18:50:32Z, 2009-07-26 00:56:59Z, 2010-02-18 12:38:47Z, 2009-07-26 20:49:26Z, 2009-07-23 18:31:12Z, 2010-02-28 11:17:12Z, 2009-07-26 07:31:19Z |
## | 103312|5k |7 |r,file,file-io,eof |2008-09-19 16:09:26Z |Gregg Lind | NA|20.6k |How can I test for the EOF flag in R?
##
## For example:
##
## f <- file(fname, "rb")
## while (???) {
## a <- readBin(f, "int", n=1)
## } |0 |0 |0 |Unheilig |2015-04-23 05:16:26Z |
## | 102056|16k |133 |r,search,r-faq |2008-09-19 14:03:25Z |user14008 | NA|3,591 |Closed. This question is off-topic. It is not currently accepting answers.
##
##
##
##
##
##
##
##
##
##
##
## Want to improve this question? Update the question so it's on-topic for Stack Overflow.
##
##
## Closed 10 years ago.
##
##
##
##
##
##
##
##
## Improve this question
##
##
##
##
## "The Google" is very helpful... unless your language is called "R," in which case it spits out tons of irrelevant stuff.
##
## Anyone have any search engine tricks for "R"? There are some specialized websites, like those below, but how can you tell Google you mean "R" the language? If I'm searching for something specific, I'll use an R-specific term, like "cbind." Are there other such tricks?
##
## rweb.stat.umn.edu
## www.rseek.org
## search.r-project.org
## www.dangoldstein.com/search_r.html |0 |0 |0 |Joris Meys |2012-05-21 09:57:35Z, 2017-05-23 12:09:51Z |
## | 95007|36k |79 |math,r,statistics |2008-09-18 17:59:19Z |Liu Chang | 15842|20,550 |I've been mystified by the R quantile function all day.
##
## I have an intuitive notion of how quantiles work, and an M.S. in stats, but boy oh boy, the documentation for it is confusing to me.
##
## From the docs:
##
##
## Q[i](p) = (1 - gamma) x[j] + gamma
## x[j+1],
##
##
## I'm with it so far. For a type i quantile, it's an interpolation between x[j] and x [j+1], based on some mysterious constant gamma
##
##
## where 1 <= i <= 9, (j-m)/n <= p <
## (j-m+1)/ n, x[j] is the jth order
## statistic, n is the sample size, and m
## is a constant determined by the sample
## quantile type. Here gamma depends on
## the fractional part of g = np+m-j.
##
##
## So, how calculate j? m?
##
##
## For the continuous sample quantile
## types (4 through 9), the sample
## quantiles can be obtained by linear
## interpolation between the kth order
## statistic and p(k):
##
## p(k) = (k - alpha) / (n - alpha - beta
## + 1),
## where α and β are constants determined
## by the type. Further, m = alpha + p(1
## - alpha - beta), and gamma = g.
##
##
## Now I'm really lost. p, which was a constant before, is now apparently a function.
##
## So for Type 7 quantiles, the default...
##
##
## Type 7
##
## p(k) = (k - 1) / (n - 1). In this case, p(k) = mode[F(x[k])]. This is used by S.
##
##
## Anyone want to help me out? In particular I'm confused by the notation of p being a function and a constant, what the heck m is, and now to calculate j for some particular p.
##
## I hope that based on the answers here, we can submit some revised documentation that better explains what is going on here.
##
## quantile.R source code
## or type: quantile.default |15 |66 |81 |sth |2009-08-03 04:56:08Z, 2020-06-20 09:12:55Z, 2019-11-07 22:29:19Z |
## | 79709|1k |5 |memory,r,function,global-variables,side-effects |2008-09-17 03:39:16Z |James Lavin | NA|0 |I have a function inside a loop inside a function. The inner function acquires and stores a large vector of data in memory (as a global variable... I'm using "R" which is like "S-Plus"). The loop loops through a long list of data to be acquired. The outer function starts the process and passes in the list of datasets to be acquired.
##
## for (dataset in list_of_datasets) {
## for (datachunk in dataset) {
## <process datachunk>
## <store result? as vector? where?>
## }
## }
##
##
## I programmed the inner function to store each dataset before moving to the next, so all the work of the outer function occurs as side effects on global variables... a big no-no. Is this better or worse than collecting and returning a giant, memory-hogging vector of vectors? Is there a superior third approach?
##
## Would the answer change if I were storing the data vectors in a database rather than in memory? Ideally, I'd like to be able to terminate the function (or have it fail due to network timeouts) without losing all the information processed prior to termination. |0 |0 |0 |smci |2016-04-28 13:21:33Z, 2015-04-16 04:07:55Z |
## | 77434|474k |359 |r,dataframe,vector |2008-09-16 21:40:29Z |James Lavin | 14008|0 |Suppose I have a vector that is nested in a dataframe with one or two levels. Is there a quick and dirty way to access the last value, without using the length() function? Something ala PERL's $# special var?
## So I would like something like:
## dat$vec1$vec2[$#]
##
## instead of:
## dat$vec1$vec2[length(dat$vec1$vec2)] |2 |18 |3 |Quinten |2023-01-01 14:54:35Z, 2019-07-31 05:11:22Z, 2017-06-17 22:25:14Z, 2019-07-31 05:17:56Z, 2016-11-29 23:48:04Z, 2016-11-29 23:48:43Z, 2017-05-03 13:09:45Z |
##
##
## | ParentId|body |author |PostTime |Reputation | BronzeMedals| SilverMedals| GoldMedals|
## |--------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------|:--------------------|:----------|------------:|------------:|----------:|
## | 76397805|what about:
## library(geosphere)
## library(dplyr)
##
## the_distances <-
## expand.grid(name_1 = df_1$name_1, name_2 = df_2$name_2) |>
## left_join(df_1 |> mutate(coords_1 = cbind(lon, lat)), by = 'name_1') |>
## left_join(df_2 |> mutate(coords_2 = cbind(lon, lat)), by = 'name_2') |>
## rowwise() |>
## mutate(hav_dist = distHaversine(coords_1, coords_2)) |>
## select(c(starts_with('name_'), hav_dist))
##
## ## > the_distances |> head()
## ## # A tibble: 6 x 3
## ## # Rowwise:
## ## name_1 name_2 hav_dist
## ## <chr> <chr> <dbl>
## ## 1 john matthew 1564.
## ## 2 david matthew 1903.
## ## 3 alex matthew 2028.
## ## ...
##
## the_distances |>
## group_by(name_1) |>
## summarize(min = min(hav_dist),
## ave = mean(hav_dist),
## max = max(hav_dist)
## )
##
## ## # A tibble: 15 x 4
## ## name_1 min ave max
## ## <chr> <dbl> <dbl> <dbl>
## ## 1 alex 354. 1361. 3108.
## ## 2 chris 1477. 2607. 3599.
## ## 3 david 302. 1519. 2678.
## ## 4 henry 1289. 2541. 3935.
## ## 5 john 880. 1862. 2701.
## ## ... |I_O |2023-06-03 21:03:32Z |2755 | 14| 2| 0|
## | 76397627|We can use fct_inorder here:
## ggplot will order x axis alphapetically. To get the order in your table use fct_inorder:
## library(ggplot2)
## library(forcats)
## library(dplyr)
##
## de %>%
## mutate(Mes = fct_inorder(Mes)) %>%
## ggplot(aes(fill = Cidade, y = Leitura, x = Mes)) +
## geom_bar(position = 'dodge', stat = 'identity') |TarJae |2023-06-03 19:08:42Z |70408 | 62| 18| 6|
## | 76397512|It's simply
## p + guides(size = guide_legend(order = 1),
## color = guide_colorbar(order = 2)) |Allan Cameron |2023-06-03 18:46:52Z |138808 | 80| 42| 7|
## | 76397435|R is vectorized and this can be done without loops at all.
## Note that nums <- seq_len(3) is an alternative way of creating the vector nums.
## nums <- 1:3
## row <- setNames(nums, paste0("Column", nums))
## as.data.frame(t(row))
## #> Column1 Column2 Column3
## #> 1 1 2 3
##
## Created on 2023-06-03 with reprex v2.0.2 |Rui Barradas |2023-06-03 18:24:05Z |68610 | 63| 32| 8|
## | 76396915|As stated in the comments, your question is more about having a custom summary function.
## Note that what you called "induvidual model summary, foo$model" is not a summary (as in output for a summary function), is just the default printing method of a rms object.
## You can use summary.lm():
## foo %>%
## {map(.$model, summary.lm)}
##
## Extra: if you don't want your models saved as a tibble, you can use group_split -> map, instead of group_by -> nest -> map:
## foo <- mtcars %>%
## group_split(cyl) %>%
## map(~ rms::ols(mpg ~ hp + wt, data = .x))
##
## Now, foo is a list and you can do more naturally:
## map(foo, summary.lm) |Ricardo Semião e Castro |2023-06-03 17:40:13Z |3983 | 27| 8| 0|
## | 76396811|Based on the latest version of R (conveniently available on a Github mirror of the development SVN repository), the probability function for the one-sided one-sample exact test is short and coded in R â which is convenient for you, as you can't call any components of R in a CRAN package other than (1) exported/publicly available R functions or (2) C functions that are defined in R.h or other header files - from the CRAN repository policy,
##
## CRAN packages should use only the public API. Hence they should not use entry points not declared as API in installed headers nor .Internal() nor .Call() etc calls to base packages. Also, ::: should not be used to access undocumented/internal objects in base packages (nor should other means of access be employed).
##
## Here's the code from the link above:
## pkolmogorov_one_exact <- function(q, n, lower.tail = TRUE) {
## ## Probability function for the one-sided one-sample Kolmogorov
## ## statistics, based on the formula of Birnbaum & Tingey (1951).
## j <- seq.int(from = 0, to = floor(n * (1 - q)))
## p <- q * sum(exp(lchoose(n, j)
## + (n - j) * log(1 - q - j / n)
## + (j - 1) * log(q + j / n)))
## if(lower.tail) 1 - p else p
## }
##
## You can copy this code into your package (although you may need to be careful if your code is licensed in some way other than GPL); it would be polite but is not legally required to acknowledge where it came from. |Ben Bolker |2023-06-03 20:45:24Z |208153 | 451| 368| 25|
## | 76396563|I guess the trick is to convert your data to a spatial format {GWmodel} can process. Please adapt below example to suit your purpose:
## get spatial data; I used {rnaturalearth} to obtain the province (=state) borders from Natural Earth, and I retrieved it as an sf-class object as I find simple features more convenient to work with.
## library(dplyr)
## library(rnaturalearth)
## provinces <- ne_states(country = 'Morocco', returnclass = 'sf')
##
##
## ## keep only name and geometry:
## provinces <- provinces |> select(name)
##
## ## > provinces
## ## Simple feature collection with 16 features and 1 field
## ## Geometry type: POLYGON
## ## Dimension: XY
## ## Bounding box: xmin: -17.01374 ymin: 21.41997 xmax: -1.031999 ymax: 35.92652
## ## CRS: +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## ## First 10 features:
## ## name geometry
## ## 58 Guelmim - Es-Semara POLYGON ((-8.817035 27.6614...
## ## 60 Laâyoune - Boujdour - Sakia El Hamra POLYGON ((-12.05582 25.9958...
##
## join attributes (model parameters) from other sources (I created some dummy data instead):
## provinces <-
## provinces |>
## mutate(geometry = st_centroid(geometry),
## x1 = rnorm(16), # predictor 1
## x2 = rnorm(16), # predictor 2
## Y = rbinom(16, 1, .5) # binomial outcome
## )
##
## convert object provinces from class sf to class sp, which is required by {GWmodel} functions:
## provinces <- as_Spatial(provinces)
##
## done (technically at least):
## the_model <- ggwr.basic(Y ~ x1 + x2,
## data = provinces,
## bw = 50, ## I just put a fixed bandwith here
## family ="binomial"
## )
##
## ## > summary(the_model)
## ## Length Class Mode
## ## GW.arguments 11 -none- list
## ## GW.diagnostic 4 -none- list
## ## glms 22 -none- list
## ## SDF 16 SpatialPointsDataFrame S4
## ## CV 16 -none- numeric
## ## timings 2 -none- list
## ## this.call 5 -none- call |I_O |2023-06-03 20:12:59Z |2755 | 14| 2| 0|
## | 76396351|I realized that the problem was that R Raster cannot read the Coordinates of sentinel data from the very beginning which messes up the merging of layers or stitching of different location. |Johnny |2023-06-03 14:45:17Z |1 | 0| 0| 0|
## | 76396291|I have got an answer after researching into it.
## Since plot works well with ctree() objects, I compared the extracted tree from cforest and the tree generated by ctree() and found the following difference in their data structure.
## For ctree object, which can be plotted:
## $ fitted:'data.frame': 150 obs. of 3 variables:
## ..$ (fitted) : int [1:150] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ (weights) : num [1:150] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ (response): Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##
## but for a tree from cforest() result, which cannot be plotted:
## $ fitted:'data.frame': 150 obs. of 4 variables:
## ..$ idx : int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ (response): Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..$ (weights) : int [1:150] 0 1 1 1 1 1 1 1 0 0 ...
## ..$ (fitted) : int [1:150] 2 2 2 2 2 2 2 2 2 2 ...
##
## Please note that the variables (response), (weights) and (fitted) are in different columns in the data structure of "fitted" dataframe in these two trees.
## Therefore, I use the following manipulation to adjust the "fitted" dataframe structure in the cforest tree object, and plotting is successful:
## library(partykit)
## cf <- cforest(Species ~., data=iris)
## tr <- gettree(cf, tree=1)
## nfitted <- data.frame(tr$fitted$`(fitted)`,tr$fitted$`(weights)`,tr$fitted$`(response)`)
## colnames(nfitted) <- c('(fitted)', '(weights)', '(response)')
## tr$fitted <- nfitted
## plot(tr)
##
## Hope this will help those who encounter the same problem with the plotting of trees from cforest() in the partykit package. |Junjie HE |2023-06-03 15:47:49Z |1 | 0| 0| 0|
## | 76396165|from the documentation of st_make_grid:
##
## Create a square or hexagonal grid covering the
## bounding box of the geometry of an sf or sfc object
##
## so you need to convert your dataframe of point coordinates to an sf-object "the_points" (and reproject to a projection accepting metric length units):
## library(sf)
##
## the_points <-
## st_sf(geometry = DF[c('lon', 'lat')] |>
## as.matrix() |>
## st_multipoint() |>
## st_sfc() |>
## st_cast('POINT'),
## crs = 4326 ## geographic data (in degrees)
## ) |>
## ## convert to projected coordinates (to specify dimensions in m
## ## take Google Mercator as first guess (EPSG-code 3857)
## st_transform(3857)
##
## create grid (note that your points have only about 100 m latitudinal range):
## the_grid <-
## st_make_grid(n = c(10, 1), cellsize = 7e3 ## 7000 km)
##
## inspect result:
## plot(the_grid)
## plot(the_points, add = TRUE) |I_O |2023-06-03 14:29:08Z |2755 | 14| 2| 0|
## | 76395825|In this examples all matrix variables of the current environment are converted to numeric.
## See the warning in the case where matrix cannot be converted to numeric.
## var1 <- matrix(1:10, 5, 2)
## var2 <- matrix(as.character(5:13), 3,3)
## var3 <- letters[1:5]
## var4 <- matrix(letters[1])
##
## print(sapply(mget(ls()), typeof))
## #> var1 var2 var3 var4
## #> "integer" "character" "character" "character"
##
## for (i in ls()[sapply(mget(ls()), is.matrix)])
## assign(i, as.numeric(get(i)))
## #> Warning in assign(i, as.numeric(get(i))): NAs introduced by coercion
##
## print(sapply(mget(ls()), typeof))
## #> i var1 var2 var3 var4
## #> "character" "double" "double" "character" "double"
##
## Created on 2023-06-03 with reprex v2.0.2 |Ric |2023-06-03 14:36:42Z |5177 | 23| 10| 0|
## | 76395682|You can set up key columns for lat / long in each data frame by multiplying by 1000 and rounding. Next add an index column in DF1 using its row numbers. You can then do a left join on the lat / long key columns and filter out the unique indices from DF1. Finally drop the unused columns and rename the resulting columns as you see fit:
## library(tidyverse)
##
## DF2 %>%
## mutate(key_lat = as.integer(B_Lat * 1000),
## key_long = as.integer(B_Long * 1000)) %>%
## left_join(DF1 %>%
## mutate(key_lat = as.integer(A_Lat * 1000),
## key_long = as.integer(A_Long * 1000),
## rownum = row_number()),
## by = c("key_lat", "key_long")) %>%
## filter(!duplicated(rownum)) %>%
## select(B_Lat, B_Long, B_Speed, A_Speed) %>%
## rename(R_Lat = B_Lat, R_Long = B_Long,
## R_B_Speed = B_Speed, R_A_Speed = A_Speed)
##
## #> R_Lat R_Long R_B_Speed R_A_Speed
## #> 1 5.66781 2.44323 2345 1345
## #> 2 5.66783 2.44534 4551 3551
## #> 3 5.66783 2.44534 4551 3551
## #> 4 5.66783 2.44534 4551 3551 |Allan Cameron |2023-06-03 11:23:42Z |138808 | 80| 42| 7|
## | 76395682|one approach would be to use the first four significant digits (signif) and {dplyr} for convenience:
## library(dplyr)
##
## DF1 |>
## left_join(DF2 |>
## mutate(across(contains(c('Lat', 'Long')),
## ~ signif(.x, 4),
## .names = '{.col}_Short')
## ),
## by = c(A_Lat = 'B_Lat_Short')
## ) |>
## select(-ends_with('_Short'))
##
## A_Lat A_Long A_Speed B_Lat B_Long B_Speed
## 1 5.667 2.443 1345 5.66743 2.44523 5551
## 2 5.667 2.443 1345 5.66723 2.44512 7551
## 3 5.667 2.443 1345 5.66734 2.44512 7551
## 4 5.667 2.443 1345 5.66712 2.44543 8551
## ## ...
##
## working with spatial data, you could also use a spatial join (by proximity within an allowable distance), e. g. with {sf}.
## library(sf)
##
## ## convert DF1 and DF2 to spatial dataframes in one go (using `Map`)
## spatial_DFs <-
## c('DF1', 'DF2') |>
## Map(f = \(df_name){
## get(df_name) |>
## rowwise() |>
## mutate(geometry = (\(...) st_sfc(st_point(unlist(...))))
## (c_across(contains(c('_Lat', '_Long'))))
## ) |>
## st_sf(crs = 4326) |> ## set WGS84 (EPSG 4326) as coord. system
## ## add some snapping tolerance by growing points
## ## into circles of 100 m radius:
## st_buffer(100)
## })
##
## ## inspect buffered points:
## spatial_DFs |>
## Reduce(f = bind_rows) |>
## ggplot() + geom_sf(aes(fill = is.na(A_Lat)), alpha = .2)
##
##
##
## ## join spatially:
## with(spatial_DFs,
## st_join(DF1, DF2)
## )
##
## Simple feature collection with 18 features and 6 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 5.661099 ymin: 2.442099 xmax: 5.667909 ymax: 2.445907
## Geodetic CRS: WGS 84
## # A tibble: 18 x 7
## A_Lat A_Long A_Speed geometry B_Lat B_Long B_Speed
## * <dbl> <dbl> <int> <POLYGON [°]> <dbl> <dbl> <int>
## 1 5.67 2.44 1345 ((5.666492 2.442246, 5.666502 2.44~ 5.67 2.44 2345
## 2 5.67 2.44 3551 ((5.666142 2.444698, 5.66615 2.444~ 5.67 2.45 4551
## 3 5.67 2.44 3551 ((5.666142 2.444698, 5.66615 2.444~ 5.67 2.45 5551
## 4 5.67 2.44 3551 ((5.666142 2.444698, 5.66615 2.444~ 5.67 2.45 7551
## ## ... |I_O |2023-06-03 12:16:48Z |2755 | 14| 2| 0|
## | 76395522|You can create a unique row number for each unique value of AAA and then cast to wide format.
## library(dplyr)
## library(tidyr)
##
## test.data %>%
## mutate(row = row_number(), .by = AAA) %>%
## pivot_wider(names_from = row, values_from = BBB:DDD, names_vary = "slowest")
##
## # A tibble: 4 Ã 10
## # AAA BBB_1 CCC_1 DDD_1 BBB_2 CCC_2 DDD_2 BBB_3 CCC_3 DDD_3
## # <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr>
## #1 APIS a 1 Mat a 2 ASG NA NA NA
## #2 PIPIS a NA MNT NA NA NA NA NA NA
## #3 AGAROS b 4 NBEH b 5 DJ c 6 EU
## #4 NOTHING NA 7 DHR NA NA NA NA NA NA
##
## Using data.table -
## library(data.table)
## setDT(test.data)
##
## dcast(test.data[, row := seq_len(.N), AAA],
## AAA ~ row, value.var = c("BBB", "CCC", "DDD")) |Ronak Shah |2023-06-03 10:23:01Z |373296 | 208| 150| 20|
## | 76395254|I suggest compiled is likely the best way to go: ultimately, if you want to be somewhat efficient, then once you find one value that meets your condition it should stop looking (for that higher/lower condition).
## Try this cpp11 implementation. Note that I am no C++ guru, and while I think I've made this safely, please double-check it yourself. Constructive advice appreciated!
## cpp11::cpp_function(r'(
## writable::integers_matrix<> next_index_diff(doubles xs, double lim) {
## int n = xs.size();
## writable::integers_matrix<> mat(n, 2);
## for (int i = 0; i < (n-1); i++) {
## mat(i,0) = n;
## mat(i,1) = n;
## if (xs[i] == NAN) continue;
## for (int j = (i+1); j < n; j++) {
## if (mat(i,1) == n && xs[j] <= (xs[i] - lim)) {
## mat(i,1) = j-i;
## if (mat(i,0) != n) break;
## }
## if (mat(i,0) == n && xs[j] >= (xs[i] + lim)) {
## mat(i,0) = j-i;
## if (mat(i,1) != n) break;
## }
## }
## }
## mat(n-1,0) = n;
## mat(n-1,1) = n;
## return mat;
## })')
##
## Demo in motion:
## next_index_diff(x, 0.5)
## # [,1] [,2]
## # [1,] 1 2
## # [2,] 4 1
## # [3,] 2 1
## # [4,] 1 10
## # [5,] 1 3
## # [6,] 10 1
## # [7,] 10 1
## # [8,] 1 10
## # [9,] 1 10
## # [10,] 10 10
## identical(next_index_diff(x, 0.5), res)
## # [1] TRUE
##
## And a demonstration at scale:
## set.seed(42)
## x <- cumsum(rnorm(1000000))
## head(x)
## # [1] 1.3709584 0.8062603 1.1693887 1.8022513 2.2065196 2.1003951
##
## system.time(res <- next_index_diff(x, 0.2))
## # user system elapsed
## # 4.412 0.000 4.415
##
## head(res)
## # [,1] [,2]
## # [1,] 3 1
## # [2,] 1 37
## # [3,] 1 35
## # [4,] 1 19
## # [5,] 2 17
## # [6,] 1 16
##
## system.time(res <- next_index_diff(x, 0.5))
## # user system elapsed
## # 5.790 0.000 5.791
## head(res)
## # [,1] [,2]
## # [1,] 4 1
## # [2,] 2 37
## # [3,] 1 36
## # [4,] 3 34
## # [5,] 2 18
## # [6,] 1 17
##
## The second batch is with 0.5 instead, and it's not really a surprise that changing from 0.2 to 0.5 would result in an increase in runtime (since it has to go further into the vector each time to find something outside of the limits).
## Thank you for the impetus: I've been intending to do some basic tests with cpp11 for future projects, and this was a good small-bite to break the ice a little. |r2evans |2023-06-03 22:23:26Z |134740 | 142| 75| 6|
## | 5869539|Very short : levels are the input, labels are the output in the factor() function. A factor has only a level attribute, which is set by the labels argument in the factor() function. This is different from the concept of labels in statistical packages like SPSS, and can be confusing in the beginning.
##
## What you do in this line of code
##
## df$f <- factor(df$f, levels=c('a','b','c'),
## labels=c('Treatment A: XYZ','Treatment B: YZX','Treatment C: ZYX'))
##
##
## is telling to R that there is a vector df$f
##
## which you want to transform into a factor,
## in which the different levels are coded as a, b, and c
## and for which you want the levels to be labeled as Treatment A etc.
## The factor function will look for the values a, b and c, convert them to numerical factor classes, and add the label values to the level attribute of the factor. This attribute is used to convert the internal numerical values to the correct labels. But as you see, there is no label attribute.
##
## > df <- data.frame(v=c(1,2,3),f=c('a','b','c'))
## > attributes(df$f)
## $levels
## [1] "a" "b" "c"
##
## $class
## [1] "factor"
##
## > df$f <- factor(df$f, levels=c('a','b','c'),
## + labels=c('Treatment A: XYZ','Treatment B: YZX','Treatment C: ZYX'))
## > attributes(df$f)
## $levels
## [1] "Treatment A: XYZ" "Treatment B: YZX" "Treatment C: ZYX"
##
## $class
## [1] "factor" |Joris Meys |2011-05-03 12:48:48Z |106101 | 262| 218| 31|
## | 5869539|I wrote a package "lfactors" that allows you to refer to either levels or labels.
##
## # packages
## install.packages("lfactors")
## require(lfactors)
##
## flips <- lfactor(c(0,1,1,0,0,1), levels=0:1, labels=c("Tails", "Heads"))
## # Tails can now be referred to as, "Tails" or 0
## # These two lines return the same result
## flips == "Tails"
## #[1] TRUE FALSE FALSE TRUE TRUE FALSE
## flips == 0
## #[1] TRUE FALSE FALSE TRUE TRUE FALSE
##
##
## Note that an lfactor requires that the levels be numeric so that they cannot be confused with the labels. |pdb |2015-05-06 04:15:41Z |1559 | 26| 12| 0|
## | 5869539|Just wanted to share a technique that I generally use for dealing with this issue of using different names for the levels of a factor variable for scripting and pretty printing:
## # Load packages
## library(tidyverse)
## library(sjlabelled)
## library(patchwork)
##
## # Create data frames
## df <- data.frame(v = c(1, 2, 3), f = c("a", "b", "c"))
## df_labelled <- data.frame(v = c(1, 2, 3), f = c("a", "b", "c")) %>%
## val_labels(
## # levels are characters
## f = c(
## "a" = "Treatment A: XYZ", "b" = "Treatment B: YZX",
## "c" = "Treatment C: ZYX"
## ),
## # levels are numeric
## v = c("1" = "Exp. Unit 1", "2" = "Exp. Unit 2", "3" = "Exp. Unit 3")
## )
##
## # df and df_labelled appear exactly the same when printed and nothing changes
## # in terms of scripting
## df
## #> v f
## #> 1 1 a
## #> 2 2 b
## #> 3 3 c
## df_labelled
## #> v f
## #> 1 1 a
## #> 2 2 b
## #> 3 3 c
##
## # Now, let's take a look at the structure of df and df_labelled
## str(df)
## #> 'data.frame': 3 obs. of 2 variables:
## #> $ v: num 1 2 3
## #> $ f: chr "a" "b" "c"
## str(df_labelled) # notice the attributes
## #> 'data.frame': 3 obs. of 2 variables:
## #> $ v: num 1 2 3
## #> ..- attr(*, "labels")= Named num [1:3] 1 2 3
## #> .. ..- attr(*, "names")= chr [1:3] "Exp. Unit 1" "Exp. Unit 2" "Exp. Unit 3"
## #> $ f: chr "a" "b" "c"
## #> ..- attr(*, "labels")= Named chr [1:3] "a" "b" "c"
## #> .. ..- attr(*, "names")= chr [1:3] "Treatment A: XYZ" "Treatment B: YZX" "Treatment C: ZYX"
##
## # Lastly, create ggplots with and without pretty names for factor levels
## p1 <- df_labelled %>% # or, df
## ggplot(aes(x = f, y = v)) +
## geom_point() +
## labs(x = "Treatment", y = "Measurement")
## p2 <- df_labelled %>%
## ggplot(aes(x = to_label(f), y = to_label(v))) +
## geom_point() +
## labs(x = "Treatment", y = "Experimental Unit")
##
## p1 / p2
##
##
## Created on 2021-08-17 by the reprex package (v2.0.0) |Ashirwad |2021-08-17 10:35:53Z |1800 | 13| 12| 0|
## | 76394709|You can do:
## library(tidyverse)
## dw %>%
## pivot_longer(cols = -c(sbj, gender, age),
## names_pattern = '(.*).(1|2)',
## names_to = c('.value', 'var'))
##
## # A tibble: 8 x 6
## sbj gender age var avg sd
## <chr> <int> <int> <chr> <int> <int>
## 1 A 1 22 1 10 6
## 2 A 1 22 2 50 10
## 3 B 1 23 1 12 5
## 4 B 1 23 2 70 11
## 5 C 2 21 1 20 7
## 6 C 2 21 2 20 8
## 7 D 2 20 1 22 8
## 8 D 2 20 2 22 9 |deschen |2023-06-03 07:09:51Z |9606 | 48| 25| 3|
## | 76394485|This is a bit complicated and there are likely more elegant approaches, but one approach using dplyr and tidyr would be to (1) grouping by NAME and CLASS only, first determine consecutive times from row to row by first separating out the start and end times, then (3) calculating the difference in the end in one row and start from another (diff). It should be zero if consecutive, so cumsum will create a new "group" for each non-consecutive time. Then (4) you can create the new time using the start time from the first consecutive row and last time from the last consecutive row, and finally (5) summarize based on the new data:
## library(dplyr)
## library(tidyr)
##
## df %>%
## separate(TIME, sep = 2, into = c("start", "end"), remove = FALSE) %>%
## mutate(diff = as.numeric(end) - as.numeric(lead(start)),
## diff = cumsum(ifelse(is.na(diff), lag(diff), diff)),
## time2 = paste0(substr(TIME[1],1,2), substr(TIME[n()], 3, 4)),
## TIME = case_when(diff == 0 ~ time2, TRUE ~ as.character(TIME)),
## .by = c(NAME, CLASS)) %>%
## summarise(SALARY = sum(SALARY), .by = c(NAME, CLASS, TIME))
##
## Output:
## NAME CLASS TIME SALARY
## 1 ALEX A 1215 600
## 2 BRODI B 1213 100
## 3 BRODI B 1415 200
## 4 BRODI B 1617 300
##
## Note that tidy::separate in this case is now superseded by separate_wider_position, so could switch out the separate(...) line with separate_wider_position(TIME, c(start = 2, end = 2), cols_remove = FALSE) for good practices (but I find the latter less intuitive).
## I would suggest you run this line-by-line to see what's going on. Good luck! |jpsmith |2023-06-03 04:37:22Z |9359 | 33| 14| 5|
## | 76394461|The distHaversine function is vectorized so there is no need to calculate the distances in a loop.
## Just create a data frame of the all the combinations, like you did in your second part and then calculate the distances for all of the rows.
## #all of the pairwise combinations
## grid <- expand.grid(i = seq_len(nrow(df_1)), j = seq_len(nrow(df_2)))
##
## #create master data frame with the pairwise information name & locations
## workingdf <- data.frame(
## name_1 = df_1$name_1[grid$i],
## lon_1 = df_1$lon[grid$i],
## lat_1 = df_1$lat[grid$i],
## name_2 = df_2$name_2[grid$j],
## lon_2 = df_2$lon[grid$j],
## lat_2 = df_2$lat[grid$j]
## )
##
## #calculate the distances for every row, taking the columns as input
## workingdf$distance <- distHaversine(workingdf[ ,c("lon_1", "lat_1")], workingdf[ ,c("lon_2", "lat_2")])
##
## #summarize
## final_summary1 <- aggregate(
## distance ~ name_1,
## data = workingdf,
## FUN = function(x) c(min = min(x), max = max(x), mean = mean(x), median = median(x), sd = sd(x))
## ) |Dave2e |2023-06-03 05:00:34Z |21677 | 48| 41| 18|
## | 76394390|The output of substr should be the pattern of sub.
## string <- "this is a funny cat"
##
## sub(substr(string, 1, 15), "orange", string)
## [1] "orange cat"
##
## Or directly replace the first 15 characters in sub.
## sub("^.{15}", "orange", string)
## [1] "orange cat" |benson23 |2023-06-03 03:14:53Z |15235 | 38| 18| 8|
## | 76394371|You can use tidyr's function pivot_wider here with values_fn and values_fill, which will return a data frame (which I assume you want):
## tidyr::pivot_wider(df, names_from = artist,
## values_from = artist,
## values_fn = list(artist = length),
## values_fill = list(artist = 0))
##
## Output:
## user `betty blowtorch` `die Ãrzte` `melissa etheridge` elvenking `juliette & the licks`
## <chr> <int> <int> <int> <int> <int>
## 1 3bd73256-3905-4f3a-97e2-⦠1 0 0 0 0
## 2 f2fb0ff0-5679-42ec-a55c-⦠0 1 0 0 0
## 3 b3ae82c2-e60b-4551-a76d-⦠0 0 1 0 0
## 4 3d6bbeb7-f90e-4d10-b440-⦠0 0 0 1 0
## 5 bbd2ffd7-17f4-4506-8572-⦠0 0 0 0 1
##
## Or in base R, a simple table may work, but will return a table structure:
## table(df$user, df$artist)
##
## # or for data frame
## as.data.frame.matrix(table(df$user, df$artist))
##
## Output:
## betty blowtorch die Ãrzte elvenking juliette & the licks melissa etheridge
## 3bd73256-3905-4f3a-97e2-8b341527f805 1 0 0 0 0
## 3d6bbeb7-f90e-4d10-b440-e153c0d10b53 0 0 1 0 0
## b3ae82c2-e60b-4551-a76d-6620f1b456aa 0 0 0 0 1
## bbd2ffd7-17f4-4506-8572-c1ea58c3f9a8 0 0 0 1 0
## f2fb0ff0-5679-42ec-a55c-15109ce6e320 0 1 0 0 0
##
## Data:
## df <- read.table(text = "user artist
## 1 3bd73256-3905-4f3a-97e2-8b341527f805 'betty blowtorch'
## 2 f2fb0ff0-5679-42ec-a55c-15109ce6e320 'die Ãrzte'
## 3 b3ae82c2-e60b-4551-a76d-6620f1b456aa 'melissa etheridge'
## 4 3d6bbeb7-f90e-4d10-b440-e153c0d10b53 elvenking
## 5 bbd2ffd7-17f4-4506-8572-c1ea58c3f9a8 'juliette & the licks'", h = TRUE) |jpsmith |2023-06-03 03:10:52Z |9359 | 33| 14| 5|
## | 76393814|Would it be as simple as adding a +1 and a -1 in the summarize line?
## ex <- have %>%
## group_by(ID) %>%
## summarize(newdate1 = c(start, date2+1), newdate2 = c(date1-1, end)) %>%
## rename(date1 = newdate1, date2 = newdate2) %>%
## filter(date2 > date1) %>%
## ungroup() %>%
## bind_rows(have) %>%
## arrange(ID, date1)
##
## Result:
## # A tibble: 4 Ã 3
## ID date1 date2
## <dbl> <date> <date>
## 1 1 2000-01-01 2000-01-28
## 2 1 2000-01-29 2000-07-15
## 3 1 2000-07-16 2001-04-30
## 4 1 2001-04-30 2001-07-31
##
## identical(want, ex)
## [1] TRUE |Ricardo Semião e Castro |2023-06-02 23:18:23Z |3983 | 27| 8| 0|
## | 76393742|The problem is that dat.test only provides the original variables that mob() has seen (i.e., Y, D, W, etc.) while survreg() has seen the processed variables y and x.
## The predict() method for mob() trees internally first predicts the node ID (which works smoothly in your example) and then passes on the correct subsets of newdata to the predict() method for the fitted model objects (from survreg() in this case). As the latter does not find the variables y and x in newdata it takes them from the learning data. Hence you get the warnings/errors about the mismatching dimensions.
## So there are two ways to deal with this:
## Make the survreg output believe it was fitted with the formula Surv(Y, D) ~ W + X1 + X2 or
## modify newdata to provide x.
## Strategy 1 is what lmtree() and glmtree() do internally. You have to be careful, though, that everything still works correctly when changing the supposed formula and terms. Hence, it is easier to apply strategy 2 safely, which is what I would recommend here.
## dat.test$x <- model.matrix(~ W + X1 + X2, data = dat.test)
## predict(glmtr, newdata = head(dat.test, 4), type = "quantile", p = 1:9/10)
## ## [,1] [,2] [,3] [,4] [,5] [,6]
## ## 1 0.0044903736 0.019754829 0.049863880 0.10133392 0.18511217 0.32115772
## ## 2 0.0008963665 0.003943451 0.009953807 0.02022824 0.03695202 0.06410937
## ## 3 0.0076736262 0.034940287 0.090110107 0.18616205 0.34486226 0.60601526
## ## 4 0.0014907219 0.006787697 0.017505298 0.03616489 0.06699489 0.11772795
## ## [,7] [,8] [,9]
## ## 1 0.5505803 0.9765134 1.9803484
## ## 2 0.1099066 0.1949312 0.3953163
## ## 3 1.0520106 1.8908381 3.8980339
## ## 4 0.2043695 0.3673249 0.7572541
##
## Caveat: The predict() method for survreg() objects with a multivariate p only returns a matrix if newdata has more than one row. If newdata has just a single row it returns a vector. This confuses the predict() method for mob() if it happens in the first node where predict() is applied because this determines the dimension of the output object. If it happens in subsequent nodes it is no problem. Also, univariate p is never a problem.
## Bonus: Yes, you are using parm as intended. However, note that this only affects the parameter instability tests. Thus, the splitting variables in the tree are selected based on how much the W effect changes along those variables. But for selecting the split point in the variable the full log-likelihood of the model (including all regressors) is maximized. Thus, the split point may be sensitive to changes in all coefficients, not just the one of W. |Achim Zeileis |2023-06-02 23:35:25Z |15438 | 49| 38| 0|
## | 76393686|.Renviron does not accept "R code". You can check what's happening for example with calling readRenviron("~/.Renviron").
## You could follow this route inside .Renviron or just put your code (above) in an .Rprofile file either in your $R_USER or project directory, depending on if you want to have those ENV variables "project specific" or not.
## Details on the startup process can be found in the R manual or for example in the startup package documentation, [startup package] also adds some nice functionality in case you need to handle passwords or other things along the "way" [ie. the startup process]. |GWD |2023-06-03 08:34:14Z |1355 | 22| 10| 130|
## | 76393686|As mentioned, .Renviron does not expect R code but rather shell-like variable assignments. So you could add the following to your .Renviron:
## PATH=~/genetools/ncbi-blast-2.2.29+/bin/:$PATH
##
## You could also adjust your PATH inside .zshrc, but unfortunately RStudio will not see these values! Unless, that is, you are actually starting RStudio from inside your zsh, for instance via open -a RStudio. Only then will it work.
## In general, applications only inherit the shell environment when they are launched from the shell, and .zshrc is only sourced by your zsh. This means that it cannot be used to set global configuration for applications on macOS. In earlier versions of macOS it was possible to configure the launch daemon (which is the background application that is launching all GUI applications on macOS). However, this no longer works because Apple disabled the mechanism.
## Unfortunately, as far as I can tell, there is no way to configure the global environment for GUI applications on macOS any more. All the different hacks that are documented on Stack Overflow fail. The only way is via running launchctl setenv ⦠at every startup. |Konrad Rudolph |2023-06-03 10:13:42Z |526489 | 1208| 930| 0|
## | 76393636|I was able to generate proper ROC curve by adjusting the weights on my rarer outcome response from 5 to 10.
## It seems the finer resolution data requires higher weights for optimum results |Jason Connelly |2023-06-02 22:54:18Z |1 | 0| 0| 0|
## | 76392914|(output <- gsub('"+', '"', gsub('<[^>]+>', "", input)))
## [1] "\"If you do not participate in any school or community activities or clubs, please describe why. (If you do not want to respond to this item, type \"skip\".)\""
##
## To view the results as expected, we just use cat:
## cat(output)
## "If you do not participate in any school or community activities or clubs, please describe why. (If you do not want to respond to this item, type "skip".)" |Onyambu |2023-06-02 19:17:50Z |64533 | 53| 23| 3|
## | 76392893|I haven't found a way to make this work without using formal styling (creating a workbook and worksheet, assigning specific styles to specific rows and columns).
## Here's a working example.
## I've added some comments to explain the arguments in addStyle. If that doesn't help or you have any questions, let me know.
## library(openxlsx)
##
## wb <- createWorkbook() # create structure/workbook
## addWorksheet(wb, "formatting") # create sheet and name it
## # create data to add to workbook
## df1 <- data.frame(values1 = -10:10, values2 = 10:-10, letters = LETTERS[1:21])
##
## writeData(wb, "formatting", df1) # add data to sheet "formatting"
##
## # add styling
## # wb, sheet 1, () for negs, first 2 cols, 100 rows
## addStyle(wb, 1, style = createStyle(numFmt = '#,##0.00_);(#,##0.00)'),
## cols = 1:2, rows = 2:100, gridExpand = T)
## saveWorkbook(wb, "newSpreadsheet.xlsx", overwrite = T) # save formatted wb |Kat |2023-06-02 20:24:18Z |14721 | 51| 18| 3|
## | 76392840|The use of mapply subsumes the ability of shift to see data around the row you're working on.
## Incidentally, I'm replacing the single-& in the if statements with &&, you should never use & there unless it is aggregated, e.g., within sum, any, all, etc. (See Difference between Boolean operators && and & and between || and | in R for a discussion on the differences.)
## One way to approach this is to pass the shifted data as an argument to your function:
## set.seed(42)
## DT <- data.table(V1 = sample(LETTERS[1:3], 20, replace = TRUE),
## V2 = sample(1:5, 20, replace = TRUE))
##
## fun2 <- function(x, y, shifty) {
## if (x == "C" && isTRUE(shifty > y)) {
## return("Greater")
## } else if (x == "C" && isTRUE(shifty < y)) {
## return("Lesser")
## } else{
## return(NA)
## }
## }
##
## DT[, V3 := mapply(fun2, x = V1, y = V2, shifty = shift(V2, type="lead"))]
## # V1 V2 V3
## # <char> <int> <char>
## # 1: A 4 <NA>
## # 2: A 5 <NA>
## # 3: A 5 <NA>
## # 4: A 5 <NA>
## # 5: B 4 <NA>
## # 6: B 2 <NA>
## # 7: B 4 <NA>
## # 8: A 3 <NA>
## # 9: C 2 Lesser
## # 10: C 1 Greater
## # 11: A 2 <NA>
## # 12: A 3 <NA>
## # 13: B 2 <NA>
## # 14: B 4 <NA>
## # 15: B 4 <NA>
## # 16: C 2 Greater
## # 17: C 5 Lesser
## # 18: A 4 <NA>
## # 19: A 5 <NA>
## # 20: C 4 <NA>
## # V1 V2 V3
##
## The use of isTRUE in this fun2 is to account for the condition where shifty will be NA; another way to avoid this is to use shifty=shift(V3, type="lead", fill=0) where 0 is some meaningful number to the context of the data and analysis.
## In case you don't need to use a function, another option is to use fcase:
## DT[, V5 := fcase(
## V1 == "C" & shift(V2, type="lead") > V2, "Greater",
## V1 == "C" & shift(V2, type="lead") < V2, "Lesser" )]
##
## One interesting thing about the use of fcase as opposed to the base if statements: with if, if any of the operands are NA and you don't explicitly account for this, then the condition itself will be NA causing the if statement to fail (see Error in if/while (condition) {: missing Value where TRUE/FALSE needed). This is not true with fcase:
## if (NA == 1) 2 else 3
## # Error in if (NA == 1) 2 else 3 : missing value where TRUE/FALSE needed
## fcase(NA == 1, 2, TRUE, 3)
## # [1] 3
##
## Relatedly, while ifelse does not fail, it also does not necessarily work as we want, whereas fifelse gives us the option to deal explicitly with NA conditions:
## ifelse(NA == 1, 2, 3)
## # [1] NA
## fifelse(NA == 1, 2, 3)
## # [1] NA
## fifelse(NA == 1, 2, 3, 4)
## # [1] 4
##
## This is suggested by the formals (and documented in the docs):
## formals(ifelse)
## # $test
## # $yes
## # $no
## formals(fifelse)
## # $test
## # $yes
## # $no
## # $na
## # [1] NA |r2evans |2023-06-02 19:16:48Z |134740 | 142| 75| 6|
## | 76392840|The main issue,on top of the NA issue that you rightly pointed out is that the if statement requires condition of length one
## a <- 1:10
## > if(a>5)
## + { print("hello")}
## Error in if (a > 5) { : the condition has length > 1
##
## And the fun1 is passing a vector -
## update fun1 is not passing a vector see comment below.
## In my opinion your second option is the right way to do the job.
## DT.v2 <- DT[V1 == "C" & shift(V2, type = "lead") > V2, V3 := "Greater"][
## V1 == "C" & shift(V2, type = "lead") < V2, V3 := "Lesser"]
##
##
## This is less error prone. If you really need to use if else statement I would suggest checking ?fcase. |DJJ |2023-06-02 19:19:02Z |2469 | 52| 28| 2|
## | 76392725|I suggest you use labels = scales::label_number(scale_cut = scales::cut_short_scale()) to allow it to determine (based on observed data) which to use. (No if_else required.)
## df <- ChickWeight
## lumpy <- (ChickWeight$weight * 10)
## df %>%
## ggplot(., aes(x = `Time`, y = lumpy)) + geom_point() +
## scale_y_continuous(name = NULL, labels = scales::label_number(scale_cut = scales::cut_short_scale()))
##
##
## Same plot code, but weight is a bit larger:
## df <- ChickWeight
## lumpy <- (ChickWeight$weight * 10000) # BIGGER
## df %>%
## ggplot(., aes(x = `Time`, y = lumpy)) + geom_point() +
## scale_y_continuous(name = NULL, labels = scales::label_number(scale_cut = scales::cut_short_scale())) |r2evans |2023-06-02 18:52:19Z |134740 | 142| 75| 6|
## | 76392631|Probably you can try this base R option (the column names might be a bit different from the desired output)
## > aggregate(. ~ number_of_degrees, df, \(x) proportions(table(x)))
## number_of_degrees ethnicity.a ethnicity.b ethnicity.c
## 1 1 0.3333333 0.3666667 0.3000000
## 2 2 0.3157895 0.2105263 0.4736842
## 3 3 0.3437500 0.4062500 0.2500000
##
## or
## reshape(
## as.data.frame(proportions(table(df), 2)),
## direction = "wide",
## idvar = "number_of_degrees",
## timevar = "ethnicity"
## )
##
## which gives
## number_of_degrees Freq.a Freq.b Freq.c
## 1 1 0.3333333 0.3666667 0.3000000
## 4 2 0.3157895 0.2105263 0.4736842
## 7 3 0.3437500 0.4062500 0.2500000
##
## Or, a less compact option with dplyr (sorry for my limited tidyverse knowledge)
## table(rev(df)) %>%
## proportions(1) %>%
## as.data.frame.matrix() %>%
## rownames_to_column(var = "number_of_degrees") %>%
## mutate(number_of_degrees = as.integer(number_of_degrees))
##
## which gives
## number_of_degrees a b c
## 1 1 0.3333333 0.3666667 0.3000000
## 2 2 0.3157895 0.2105263 0.4736842
## 3 3 0.3437500 0.4062500 0.2500000 |ThomasIsCoding |2023-06-02 18:34:18Z |92599 | 78| 23| 8|
## | 76392631|This is one option:
## df |>
## summarise(n = n(),.by = c(number_of_degrees,ethnicity)) |>
## mutate(pct = n / sum(n),.by = number_of_degrees) |>
## select(-n) |>
## pivot_wider(names_from = ethnicity,
## values_from = pct,
## names_prefix = 'percent_',
## names_sort = TRUE) |joran |2023-06-02 18:34:10Z |169077 | 466| 429| 32|
## | 76392631|Here is a way. It is not more compact but doesn't hard code ethnicity values.
## suppressPackageStartupMessages({
## library(dplyr)
## library(tidyr)
## })
##
## df %>%
## count(ethnicity, number_of_degrees) %>%
## group_by(number_of_degrees) %>%
## mutate(n = 100*prop.table(n)) %>%
## pivot_wider(number_of_degrees,
## names_from = ethnicity,
## names_glue = "percent_{ethnicity}",
## values_from = n)
## #> # A tibble: 3 Ã 4
## #> # Groups: number_of_degrees [3]
## #> number_of_degrees percent_a percent_b percent_c
## #> <int> <dbl> <dbl> <dbl>
## #> 1 1 33.3 36.7 30
## #> 2 2 31.6 21.1 47.4
## #> 3 3 34.4 40.6 25
##
## Created on 2023-06-02 with reprex v2.0.2 |Rui Barradas |2023-06-02 18:51:48Z |68610 | 63| 32| 8|
## | 76392631|using mosaic::percs for percentages. You can use counts(..., format = 'percent')
## mosaic::percs(ethnicity~number_of_degrees, df)[-1]
##
## number_of_degrees perc_a perc_b perc_c
## 1 1 33.33333 36.66667 30.00000
## 2 2 31.57895 21.05263 47.36842
## 3 3 34.37500 40.62500 25.00000 |Onyambu |2023-06-02 18:58:00Z |64533 | 53| 23| 3|
## | 76392631|Here is a one-liner to get a table out using this data. No packages are used.
## 100*proportions(table(df[2:1]), 1)
##
## giving:
## ethnicity
## number_of_degrees a b c
## 1 33.33333 36.66667 30.00000
## 2 31.57895 21.05263 47.36842
## 3 34.37500 40.62500 25.00000
##
## You could consider adding a margin to make it clear that the rows add to 100.
## addmargins(100*proportions(table(df[2:1]), 1), 2)
##
## giving:
## ethnicity
## number_of_degrees a b c Sum
## 1 33.33333 36.66667 30.00000 100.00000
## 2 31.57895 21.05263 47.36842 100.00000
## 3 34.37500 40.62500 25.00000 100.00000 |G. Grothendieck |2023-06-02 19:53:02Z |251923 | 333| 198| 17|
## | 76392610|(?i) starts case-insensitive mode
## "strawberries" will be replaced by "strawberry" regardless of the case of the characters.
## library(dplyr)
## library(stringr)
##
## df %>%
## mutate(fruit = str_replace_all(fruit, "(?i)strawberries", "strawberry"))
##
## farmer fruit number
## 1 Sally strawberry 32
## 2 Joe strawberry 12
## 3 Bob pear 42
##
## data:
## df <- structure(list(farmer = c("Sally", "Joe", "Bob"), fruit = c("Strawberries",
## "strawberry", "pear"), number = c(32L, 12L, 42L)), class = "data.frame", row.names = c("1",
## "2", "3")) |TarJae |2023-06-02 18:21:27Z |70408 | 62| 18| 6|
## | 76392610|welcome to stackoverflow.
## We don't in general say hello and thanks but for new comers it might be nice, to get you on board. Asking for help and learning to do so is a great in programming.
## Please consider reading creating reproducible example in R. This will help us help you. Here a small data set at TarJae provided would make thing easier for anyone who wants to help.
## In fact you are right from the begining, but I suspect that you confused df and df1
## df <- structure(list(farmer = c("Sally", "Joe", "Bob"), fruit =
## c("strawberries", "strawberry", "pear"), number = c(32L, 12L, 42L)),
## class = "data.frame",
## row.names = c("1", "2", "3"))
##
##
## ## df
## ## farmer fruit number
## ## 1 Sally strawberries 32
## ## 2 Joe strawberry 12
## ## 3 Bob pear 42
##
## If I apply your very own command but don't assign it to df1 I have what you expect.
## df %>% mutate(fruit = str_replace(fruit, "strawberries", "strawberry"))
##
## of course there is nothing wrong to assigning it to df1
## df1 <- df %>% mutate(fruit = str_replace(fruit, "strawberries", "strawberry"))
##
## but then we need to check df1
## ## df1
## ## farmer fruit number
## ## 1 Sally strawberry 32
## ## 2 Joe strawberry 12
## ## 3 Bob pear 42
##
## This is because the mutatefunction makes a copy of the table df, so the table df is left unchanged. Indeed, in my opinion mutate here is a poor choice of name |DJJ |2023-06-02 19:00:32Z |2469 | 52| 28| 2|
## | 76392409|one approach would be to
## first add a (not yet used) level "A-C" to the factor "cluster"
## add a geom_histogram with its own dataset, which is the original data with cluster set to "A-C" for all observations
## Example:
## d <- data.frame(cluster = gl(3, 100, labels = c(LETTERS[1:3], 'A-C')),
## value = rnorm(300)
## )
##
## d |>
## ggplot(aes(value, fill = cluster)) +
## geom_histogram() +
## geom_histogram(data = d |> mutate(cluster = gl(1, 300, labels = c('A-C')))) +
## facet_wrap(~ cluster, ncol = 1) +
## scale_fill_discrete(breaks = c(LETTERS[1:3], 'A-C'),
## name = 'redundant use\nof colour'
## ) |I_O |2023-06-03 15:34:32Z |2755 | 14| 2| 0|
## | 76392367|skip=n is only for read_excel()
## in the googlesheets4 library to start at a specified range, you'd use range_write(). This is similar to the startRow=n in the xlsx library.
## range_write(ss = "google_sheet_url", data = df, range = "B1", sheet = "test") |stefan_aus_hannover |2023-06-02 18:09:36Z |1553 | 13| 11| 0|
## | 76392347|There's not really any such thing as a blank entry in an R data frame; it might be NA (the most likely possibility) or an empty string "" in a character/factor column (but it should be an NA, unless you want "blank" to be an additional category). By default, R will discard rows of a data frame with NA values, if the NA-valued columns are included in a model ("complete case analysis").
## So, you should almost certainly use a tool like zoo::na.locf() (locf = "last observation carried forward") or tidyr::fill() to replace those blanks with the appropriate values.
## At which level the values of a predictor variable vary matters only at two steps of the analysis.
## deciding how to structure the model: in general, you should include random effects for all predictors that vary within groups, i.e. if x varies within group g then your model should allow for the possibility that the effects of x (i.e. slopes of y with respect to x) vary across groups, see e.g. Schielzeth and Forstmeier 2009.
## inference after fitting the model: if you are using a framework that allows you to use finite-size corrections (typically by using an F-test), then the denominator degrees of freedom should be adjusted based on the levels of variation.
## simulate some data
## For illustration, I'm going to make up an example with a continuous and a categorical predictor that vary across groups (age and ttt), and a continuous and a categorical predictor (x and t2) that vary within groups.
## library(nlme)
## library(lmerTest)
##
## set.seed(101)
## ss <- data.frame(id = factor(1:20),
## age = seq(10, 20, length.out = 20),
## ttt = rep(c("c", "t"), 10))
##
## dd <- data.frame(id = rep(factor(1:20), each = 10),
## x = rnorm(200),
## t2 = factor(sample(c("a", "b"), size = 200,
## replace = TRUE))
## )
## dd <- merge(dd, ss, by = "id")
## dd$y <- simulate(~ x + age + ttt + t2 + (x + t2 |id),
## family = gaussian,
## newdata = dd,
## newparams = list(beta = c(0, 4, 1, 2, 3),
## ## 3 x 3 cov matrix, need 6 params
## theta = rep(1, 6),
## sigma = 1))[[1]]
##
## fit models
## Allowing the effects of x and t2 to vary across levels of id:
## m_lme <- lme(y ~ x + age + ttt + t2, random = ~ x + t2 | id,
## data = dd)
## m_lmer <- lmer(y ~ x + age + ttt + t2 + (x + t2 | id),
## data = dd)
##
## inference
## anova(m_lme)
## numDF denDF F-value p-value
## (Intercept) 1 178 11848.141 <.0001
## x 1 178 25.549 <.0001
## age 1 17 488.387 <.0001
## ttt 1 17 54.349 <.0001
## t2 1 178 78.693 <.0001
##
## The denDF column shows what lme "thinks" is the level of replication for each predictor. The algorithm that lme uses (which involves counting number of parameters and number of observations at each level) gets the right answer for age and ttt, but it gets the wrong answer for x and t2 (since these are allowed to vary across groups, the correct inference should be based on the number of groups, not the number of observations ... This problem is [noted in the GLMM FAQ](GLMM FAQ), and I link to some code that does a slightly better job.)
## In contrast, the lmerTest package uses more sophisticated approximation algorithms (lme4 doesn't give p-values at all), which give approximately correct answers. (In this case we know the correct denominator df for all four predictors is 17; the range from 16-20 should make little difference in practice ...)
## anova(m_lmer)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## x 99.12 99.12 1 18.939 110.983 2.338e-09 ***
## age 411.70 411.70 1 19.803 460.956 3.475e-15 ***
## ttt 44.57 44.57 1 16.846 49.898 2.003e-06 ***
## t2 70.29 70.29 1 18.846 78.697 3.736e-08 ***
##
## The Kenward-Roger method is more computationally demanding but usually more accurate (in this case it gives similar answers):
## anova(m_lmer, ddf = "Kenward-Roger")
## Type III Analysis of Variance Table with Kenward-Roger's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## x 98.80 98.80 1 18.958 110.621 2.375e-09 ***
## age 327.72 327.72 1 19.252 366.937 5.345e-14 ***
## ttt 35.21 35.21 1 16.376 39.425 9.879e-06 ***
## t2 69.71 69.71 1 18.908 78.047 3.876e-08 ***
##
## Schielzeth, Holger, and Wolfgang Forstmeier. âConclusions beyond Support: Overconfident Estimates in Mixed Models.â Behavioral Ecology 20, no. 2 (March 1, 2009): 416â20. https://doi.org/10.1093/beheco/arn145. |Ben Bolker |2023-06-03 17:57:37Z |208153 | 451| 368| 25|
## | 76392259|I couldn't think of a more straightforward approach, but this achieves what you want using dplyr and tidyr. The workflow is:
## Create 'dummy' variable "APPEARED" with all == 1. This is to ensure only one instance of "APPEARANCE" is created by slice(rep())
## Pivot to long form and create a new row for each time unit
## Create "time_var" column to hold "APPEARANCE" value and sequential value for time units in existence
## Filter any "DISAPPEARANCE" values < "APPEARANCE" values
## I wasn't sure how you wanted to handle the time units that are neither the first nor last time unit for each group so I've arbitrarily identified them as "EXISTED".
## library(dplyr)
## library(tidyr)
##
## # Sample data
## df <- read.table(text = "SPECIES GROUP APPEARANCE DISAPPEARANCE PC1 PC2
## Alpha A 1 2 -0.100119669 0.0595756
## Beta B 1 2 0.217854357 -0.010260947
## Gamma B 1 5 0.193998962 -0.029757206
## Delta B 2 3 0.126089613 0.014409345
## Epsilon D 3 4 -0.094182811 -0.004406088", header = TRUE)
##
## test <- df %>%
## mutate(APPEARED = 1) %>%
## pivot_longer(cols = -c(SPECIES, GROUP, APPEARANCE, PC1, PC2),
## names_to = "status") %>%
## group_by(SPECIES, status) %>%
## slice(rep(1:n(), each = value)) %>%
## mutate(time_var = 1:n(),
## time_var = ifelse(status == "APPEARED", APPEARANCE, time_var)) %>%
## group_by(SPECIES) %>%
## mutate(tmp = first(time_var)) %>%
## filter(time_var > tmp | status == "APPEARED") %>%
## mutate(status = ifelse(status == "DISAPPEARANCE" & time_var == max(time_var),
## "DISAPPEARED",
## ifelse(status == "APPEARED", 'APPEARED', "EXISTED"))) %>%
## select(-c(APPEARANCE, value, tmp)) %>%
## ungroup()
##
## data.frame(test)
## SPECIES GROUP PC1 PC2 status time_var
## 1 Alpha A -0.10011967 0.059575600 APPEARED 1
## 2 Alpha A -0.10011967 0.059575600 DISAPPEARED 2
## 3 Beta B 0.21785436 -0.010260947 APPEARED 1
## 4 Beta B 0.21785436 -0.010260947 DISAPPEARED 2
## 5 Delta B 0.12608961 0.014409345 APPEARED 2
## 6 Delta B 0.12608961 0.014409345 DISAPPEARED 3
## 7 Epsilon D -0.09418281 -0.004406088 APPEARED 3
## 8 Epsilon D -0.09418281 -0.004406088 DISAPPEARED 4
## 9 Gamma B 0.19399896 -0.029757206 APPEARED 1
## 10 Gamma B 0.19399896 -0.029757206 EXISTED 2
## 11 Gamma B 0.19399896 -0.029757206 EXISTED 3
## 12 Gamma B 0.19399896 -0.029757206 EXISTED 4
## 13 Gamma B 0.19399896 -0.029757206 DISAPPEARED 5 |Leroy Tyrone |2023-06-03 01:28:18Z |1206 | 23| 14| 0|
## | 76392228|Thanks to the very helpful comments I now understood that this is not an R problem, but rather a statistical one.
## If the interaction term includes a factor, the estimates show the effects of factor levels in comparison to a reference level. In the estimates I posted above, cc4 is the reference level. This means that e.g. students in classrooms with climate 3 (cc3) have an expected value of -0.66 compared to students in a classroom with climate 4 (cc4).
## Again a big thank you to all the contributors of the comments that helped me to understand this! |Svenja |2023-06-02 18:18:03Z |11 | 0| 0| 0|
## | 76392177|base R:
## # Example data frame:
## df <- data.frame(group = c(100, 100, 100, 200, 200, 200, 300, 300, 300))
##
## df$number <- ave(df$group, df$group, FUN = seq_along)
##
## dplyr:
## library(dplyr) # > 1.1.0
## df %>%
## mutate(number = row_number(), .by=group)
##
## group number
## 1 100 1
## 2 100 2
## 3 100 3
## 4 200 1
## 5 200 2
## 6 200 3
## 7 300 1
## 8 300 2
## 9 300 3 |TarJae |2023-06-02 17:14:37Z |70408 | 62| 18| 6|
## | 76392149|Some suspicious stuff, like the "number-of" variables which should probably instead be assignment vectors where the captain is not considered a player. The following succeeds but I have not translated it back to R.
## import pandas as pd
## import pulp
##
## df = pd.DataFrame(
## {
## 'Points': (
## 62.8, 48.8, 45.8, 41.8, 35.3, 30.3, 29.3,
## 23.8, 23.3, 22.3, 21.8, 19, 16.3, 8.5, 6.8,
## ),
## 'Cost': (
## 14000, 13400, 10800, 9200, 7600, 6600, 7400,
## 5600, 5800, 5200, 6200, 4800, 4200, 2200, 1400,
## ),
## },
## index=pd.Index(name='Name', data=(
## "Jokic", "Butler", "Murray", "Adebayo",
## "Porter", "Gordon", "Martin", "Pope", "Vincent", "Lowry", "Brown",
## "Strus", "Robinson", "Green", "Highsmith",
## ))
## )
##
##
## def make_vars(row: pd.Series) -> pd.Series:
## is_captain = pulp.LpVariable(name=f'{row.name}_captain', cat=pulp.LpBinary)
## is_player = pulp.LpVariable(name=f'{row.name}_player', cat=pulp.LpBinary)
##
## # person cannot be both a player and a captain; make these mutually exclusive
## prob.addConstraint(
## name=f'{row.name}_excl', constraint=is_player + is_captain <= 1,
## )
##
## return pd.Series(
## index=('Captain', 'Player'),
## data=(is_captain, is_player),
## )
##
##
## prob = pulp.LpProblem(name='nba_team', sense=pulp.LpMaximize)
## df = pd.concat((df, df.apply(make_vars, axis=1)), axis=1)
##
## # the total cost is the dot product of costs with both the captain and player assignment variables
## captain_coef = 1.5
## total_cost = captain_coef * df.Captain.dot(df.Cost) + df.Player.dot(df.Cost)
##
## prob.addConstraint(name='total_cost', constraint=total_cost <= 50_000)
## # there are exactly four non-captain players
## prob.addConstraint(name='n_players', constraint=df.Player.sum() == 4)
## # there is exactly one captain
## prob.addConstraint(name='n_captains', constraint=df.Captain.sum() == 1)
##
## # maximize the number of points for the roster, with more weight to the captain
## prob.objective = captain_coef * df.Captain.dot(df.Points) + df.Player.dot(df.Points)
##
## print(prob)
## prob.solve()
## assert prob.status == pulp.LpStatusOptimal
##
## captains = df.loc[
## df.Captain.apply(pulp.LpVariable.value) > 0.5,
## ['Points', 'Cost']
## ]
## players = df.loc[
## df.Player.apply(pulp.LpVariable.value) > 0.5,
## ['Points', 'Cost']
## ]
## print(f'{prob.objective.value():.1f} points for ${total_cost.value():,.2f}')
## print()
## print('Captain:')
## print(captains)
## print()
## print('Players:')
## print(players)
##
## nba_team:
## MAXIMIZE
## 62.699999999999996*Adebayo_captain + 41.8*Adebayo_player + 32.7*Brown_captain + 21.8*Brown_player + 73.19999999999999*Butler_captain + 48.8*Butler_player + 45.45*Gordon_captain + 30.3*Gordon_player + 12.75*Green_captain + 8.5*Green_player + 10.2*Highsmith_captain + 6.8*Highsmith_player + 94.19999999999999*Jokic_captain + 62.8*Jokic_player + 33.45*Lowry_captain + 22.3*Lowry_player + 43.95*Martin_captain + 29.3*Martin_player + 68.69999999999999*Murray_captain + 45.8*Murray_player + 35.7*Pope_captain + 23.8*Pope_player + 52.949999999999996*Porter_captain + 35.3*Porter_player + 24.450000000000003*Robinson_captain + 16.3*Robinson_player + 28.5*Strus_captain + 19.0*Strus_player + 34.95*Vincent_captain + 23.3*Vincent_player + 0.0
## SUBJECT TO
## Jokic_excl: Jokic_captain + Jokic_player <= 1
##
## Butler_excl: Butler_captain + Butler_player <= 1
##
## Murray_excl: Murray_captain + Murray_player <= 1
##
## Adebayo_excl: Adebayo_captain + Adebayo_player <= 1
##
## Porter_excl: Porter_captain + Porter_player <= 1
##
## Gordon_excl: Gordon_captain + Gordon_player <= 1
##
## Martin_excl: Martin_captain + Martin_player <= 1
##
## Pope_excl: Pope_captain + Pope_player <= 1
##
## Vincent_excl: Vincent_captain + Vincent_player <= 1
##
## Lowry_excl: Lowry_captain + Lowry_player <= 1
##
## Brown_excl: Brown_captain + Brown_player <= 1
##
## Strus_excl: Strus_captain + Strus_player <= 1
##
## Robinson_excl: Robinson_captain + Robinson_player <= 1
##
## Green_excl: Green_captain + Green_player <= 1
##
## Highsmith_excl: Highsmith_captain + Highsmith_player <= 1
##
## total_cost: 13800 Adebayo_captain + 9200 Adebayo_player + 9300 Brown_captain
## + 6200 Brown_player + 20100 Butler_captain + 13400 Butler_player
## + 9900 Gordon_captain + 6600 Gordon_player + 3300 Green_captain
## + 2200 Green_player + 2100 Highsmith_captain + 1400 Highsmith_player
## + 21000 Jokic_captain + 14000 Jokic_player + 7800 Lowry_captain
## + 5200 Lowry_player + 11100 Martin_captain + 7400 Martin_player
## + 16200 Murray_captain + 10800 Murray_player + 8400 Pope_captain
## + 5600 Pope_player + 11400 Porter_captain + 7600 Porter_player
## + 6300 Robinson_captain + 4200 Robinson_player + 7200 Strus_captain
## + 4800 Strus_player + 8700 Vincent_captain + 5800 Vincent_player <= 50000
##
## n_players: Adebayo_player + Brown_player + Butler_player + Gordon_player
## + Green_player + Highsmith_player + Jokic_player + Lowry_player
## + Martin_player + Murray_player + Pope_player + Porter_player
## + Robinson_player + Strus_player + Vincent_player = 4
##
## n_captains: Adebayo_captain + Brown_captain + Butler_captain + Gordon_captain
## + Green_captain + Highsmith_captain + Jokic_captain + Lowry_captain
## + Martin_captain + Murray_captain + Pope_captain + Porter_captain
## + Robinson_captain + Strus_captain + Vincent_captain = 1
##
## VARIABLES
## 0 <= Adebayo_captain <= 1 Integer
## 0 <= Adebayo_player <= 1 Integer
## 0 <= Brown_captain <= 1 Integer
## 0 <= Brown_player <= 1 Integer
## 0 <= Butler_captain <= 1 Integer
## 0 <= Butler_player <= 1 Integer
## 0 <= Gordon_captain <= 1 Integer
## 0 <= Gordon_player <= 1 Integer
## 0 <= Green_captain <= 1 Integer
## 0 <= Green_player <= 1 Integer
## 0 <= Highsmith_captain <= 1 Integer
## 0 <= Highsmith_player <= 1 Integer
## 0 <= Jokic_captain <= 1 Integer
## 0 <= Jokic_player <= 1 Integer
## 0 <= Lowry_captain <= 1 Integer
## 0 <= Lowry_player <= 1 Integer
## 0 <= Martin_captain <= 1 Integer
## 0 <= Martin_player <= 1 Integer
## 0 <= Murray_captain <= 1 Integer
## 0 <= Murray_player <= 1 Integer
## 0 <= Pope_captain <= 1 Integer
## 0 <= Pope_player <= 1 Integer
## 0 <= Porter_captain <= 1 Integer
## 0 <= Porter_player <= 1 Integer
## 0 <= Robinson_captain <= 1 Integer
## 0 <= Robinson_player <= 1 Integer
## 0 <= Strus_captain <= 1 Integer
## 0 <= Strus_player <= 1 Integer
## 0 <= Vincent_captain <= 1 Integer
## 0 <= Vincent_player <= 1 Integer
##
##
## Result - Optimal solution found
##
## Objective value: 225.40000000
## Enumerated nodes: 0
## Total iterations: 0
## Time (CPU seconds): 0.03
## Time (Wallclock seconds): 0.04
##
## Option for printingOptions changed from normal to all
## Total time (CPU seconds): 0.04 (Wallclock seconds): 0.04
##
## 225.4 points for $50,000.00
##
## Captain:
## Points Cost
## Name
## Jokic 62.8 14000
##
## Players:
## Points Cost
## Name
## Adebayo 41.8 9200
## Porter 35.3 7600
## Gordon 30.3 6600
## Pope 23.8 5600
##
## If it turns out that you really do want six players, then change the 4 to 5 and this produces
## 224.7 points for $50,000.00
##
## Captain:
## Points Cost
## Name
## Jokic 62.8 14000
##
## Players:
## Points Cost
## Name
## Adebayo 41.8 9200
## Porter 35.3 7600
## Gordon 30.3 6600
## Robinson 16.3 4200
## Highsmith 6.8 1400 |Reinderien |2023-06-02 22:16:34Z |9990 | 74| 49| 5|
## | 76392055|You can use gregexpr and regmatches with pat1.*?pat2 or in case they should be on a word boundary with \\bpat1.*?pat2\\b.
## regmatches(string, gregexpr("pat1.*?pat2", string))[[1]]
## #[1] "pat1 hello333\n pat2" "pat1 again pat2"
##
## regmatches(string, gregexpr("\\bpat1.*?pat2\\b", string))[[1]]
## #[1] "pat1 hello333\n pat2" "pat1 again pat2" |GKi |2023-06-02 18:58:00Z |35830 | 48| 26| 2|
## | 76391960|As in other dpylr verbs you could assign values to dynamically created LHS names by using the walrus operator := and !!sym(col) or glue syntax "{col}".
## Using a minimal reproducible example based on mtcars:
## library(dplyr, warn=FALSE)
##
## col <- "cyl"
## mtcars |>
## head() |>
## add_row(cyl = 1) |>
## add_row("{col}" := 2) |>
## add_row(!!sym(col) := 3)
## #> mpg cyl disp hp drat wt qsec vs am gear carb
## #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## #> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## #> ...7 NA 1 NA NA NA NA NA NA NA NA NA
## #> ...8 NA 2 NA NA NA NA NA NA NA NA NA
## #> ...9 NA 3 NA NA NA NA NA NA NA NA NA |stefan |2023-06-02 16:43:50Z |84193 | 50| 25| 6|
## | 76391906|There are likely quite a few ways to accomplish this, but one thing you could do is this:
## min_date <- df |> pull({{date}}) |> min()
##
## (Minor point: I find it a little confusing to read the function when you name the arguments using the same name as the values you're likely to pass in. If it were me I'd use more generic argument names like function(df, var1, var2) or something.) |joran |2023-06-02 16:25:04Z |169077 | 466| 429| 32|
## | 76391906|We can use as_label former quo_name from rlang package: https://rlang.r-lib.org/reference/quo_label.html
## These functions take an arbitrary R object, typically an expression, and represent it as a string.
## Using this example data frame:
## dat <- data.frame(
## date = c("2023-06-01", "2023-06-02", "2023-06-03"),
## shots = c(8, 12, 10),
## passes = c(340, 410, 390)
## )
##
## you want to bring this working ggplot
## library(ggplot2)
##
## dat %>%
## ggplot(aes(x = date, y = shots))+
## geom_col()+
## ggtitle(paste("Shot Totals from", min(dat$date), "-", max(dat$date))
## )
##
## to a custom function:
## library(ggplot2)
## library(dplyr)
## library(rlang)
##
## cust_ggplot <- function(df, x, y) {
## x <- enquo(x) # Quote the 'x' argument
## y <- enquo(y) # Quote the 'y' argument
##
## ggplot(df, aes(x = !!x, y = !!y)) +
## geom_col() +
## ggtitle(paste("Shot Totals from", min(df[[as_label(x)]]), "-", max(df[[as_label(x)]])))
## }
##
## cust_ggplot(dat, date, shots) |TarJae |2023-06-02 16:42:58Z |70408 | 62| 18| 6|
## | 76391852|Using your data, I believe this does what you want:
## h <- hist(my_vector)
##
## ggplot(data = data.frame(x = my_vector),aes(x = x)) +
## stat_bin(geom = 'bar',breaks = h$breaks) |joran |2023-06-02 16:38:00Z |169077 | 466| 429| 32|
## | 76391751|I don't follow the calculations to get what you have as the sample output, but based on your description:
## library(dplyr)
##
## df_100 <- data.frame(siteid= seq(1,5,1),conflu=c(3,2,4,5,6),diflu=c(9,2,30,2,5))
##
## df_full <- data.frame(siteid = seq(1,10,2),conflu=c(6,3,5,2,3),diflu=c(5,9,2,30,7))
##
## df_difference <- df_100 |>
## inner_join(df_full, by = "siteid", suffix = c("_100", "_full")) |>
## mutate(
## diff_con = conflu_full - conflu_100,
## diff_dif = diflu_full - diflu_100
## ) |>
## select(siteid, diff_con, diff_dif)
##
## inner_join will match and keep only the rows with same "siteid". Then use mutate to do the calculations and select the columns you want. |Marcus |2023-06-02 16:40:54Z |3275 | 15| 7| 0|
## | 76391552|With the modified data containing multiple ELSEIF\n see if strsplit works. It splits the line into several rows. Then detect criteria ) and paste together the desired string.
## library(dplyr)
##
## data.frame(text = unlist(strsplit(mystring2, "ELSEIF\n"))) %>%
## filter(grepl("criteria \\)", text)) %>%
## mutate(text = gsub("(criteria \\)).*", "\\1", paste0("ELSEIF\n", text))) |Andre Wildberg |2023-06-02 16:09:17Z |11033 | 29| 12| 3|
## | 76391552|In tidyverse:
## str_extract(
## mystring,
## regex(
## "ELSEIF\n +\\(\n.+\\)\\) criteria \\)",
## dotall=TRUE
## )
## )
##
## See https://stringr.tidyverse.org/reference/modifiers.html |fm361 |2023-06-02 16:15:13Z |16 | 2| 0| 0|
## | 76391482|the labeller argument to facet_wrap might come in handy, if you set it to "label_parsed". Example:
## d <- data.frame(x = rnorm(2),
## y = rnorm(2),
## ID = c(paste0('g~', 1:2),
## paste0('g~nabla~', 1:2)
## )
## )
##
##
## d |>
## ggplot(aes(x, y)) +
## geom_point() +
## facet_wrap(. ~ ID,
## labeller = label_parsed,
## nrow = 2
## ) |I_O |2023-06-02 18:30:08Z |2755 | 14| 2| 0|
## | 76391151|A full join will keep theh values of both tables
## library(dplyr)
## full_join(total_number_views_ndinstict, total_number_views_unique_na) |G. Grothendieck |2023-06-02 14:53:42Z |251923 | 333| 198| 17|
## | 76391100|I assume that you have used
## group_by(Name) |>.... that would require to use ungroup() before you use cumsum() otherwise you calculate the cumulative sum within each group rather than an overall cummulative sum. |Beate |2023-06-02 15:24:11Z |31 | 4| 0| 0|
## | 76391093|As there is a temporal element, I guess your first step would be to create a line plot where you have time on x axis and number of mature on y. And you will have a line for each treatment. You might want to consider a time series analysis if interested in all time points. Depending on your experiment an idea could be of just looking at the last time of observation; you can create a new variable which is the difference of mature at t_final - mature at t_start. This new variable could be your dependent variable for a linear regression. The predictor would be the categorical variable of treatment group (use control group as baseline for the categorical comparison).
## I don't think you want to do separate regressions, as you are trying to estimate the effect of treatment, so that is your independent variable. |Mauro r |2023-06-02 14:58:28Z |15 | 4| 0| 0|
## | 76390501|With {vroom} you can conveniently read in text files into one combined dataframe like so:
## library(vroom)
## vroom(list.files('path/to/your/untarred/files', full.names = TRUE),
## id = 'source' ## optional: keep file names in column 'source'
## ) |I_O |2023-06-02 15:47:11Z |2755 | 14| 2| 0|
## | 76390426|library(ggplot2)
## #create my dataset
## a<-c("4000","7500","4000","102000","14500","150000")
## b<-c("A","A","A","B","B","B")
## data<-data.frame(cbind(a,b))
## data$b<-as.factor(data$b)
## data$a<-as.integer(data$a)
## #plot the data, with scientific annotation
## scientific_10 <- function(x) { format(as.numeric(x), scientific = TRUE) }
## A<-ggplot(data, aes(x = b, y = a, fill = b)) +
## geom_boxplot() +
## labs(y= "ppm", x = "contaminant")+
## geom_point(shape = 21, position = position_jitterdodge(jitter.width = 0)) + scale_y_continuous(label=scientific_10)
## A |br00t |2023-06-02 13:33:25Z |1420 | 10| 8| 0|
## | 76390329|you can shift the position with position_nudge:
## ## using your example objects:
## ggppp +
## geom_density(mapping = aes(y = y , col = id),
## position = position_nudge(x = 12),
## inherit.aes = FALSE
## ) +
## geom_density(mapping = aes(x = x, col = id),
## position = position_nudge(y = 20),
## inherit.aes = FALSE
## ) |I_O |2023-06-02 14:04:11Z |2755 | 14| 2| 0|
## | 76390299|One option would be to pass both the value and and the index column to your function:
## data <- data.frame(
## x = c(4, 5, 6, 2, 3, 5, 8, 1),
## name = c("A", "B", "A", "B", "A", "A", "B", "B")
## )
##
## myfun <- function(x) {
## sprintf("The mean of %s is %f", unique(x[[2]]), mean(x[[1]]))
## }
##
## tapply(data[c("x", "name")], data$name, myfun)
## #> A B
## #> "The mean of A is 4.500000" "The mean of B is 4.000000" |stefan |2023-06-02 12:57:20Z |84193 | 50| 25| 6|
## | 76389710|Here's an example. As you suggested it might be easiest to rasterize the vector (shapefile data) first if you're satisfied with the 100 m resolution.
## create some sample data:
## library(terra)
## library(sf) ## only needed here to create sample landuse polygon
##
##
## ## create a sample vector
## ## or simply: terra::vect('path/to/your/shapefile.shp')
## v_landuse <-
## st_point(c(0, 0)) |>
## st_buffer(5) |>
## st_sfc() |>
## vect() ## convert to terra-compatible class "SpatVector"
##
##
## ## create sample rasters of random values:
## set.seed(42)
## raster_height <- raster_width <- 6
##
## ## rasterize v_landuse and recode with 'crop' and 'grass'
## r_landuse <- rasterize(v_landuse, rast(matrix(1, raster_height, raster_width))) |>
## setValues(ifelse(values(r_landuse), 'crop', 'grass'))
##
## r_texture <- c('low', 'middle', 'high')[1 + rbinom(raster_length ^ 2, 2, 1/3)] |>
## matrix(raster_height) |> rast()
##
## r_carbon <- runif(raster_height^2, 1, 4) |>
## matrix(raster_height) |> rast()
##
##
## run your raster algebra, e. g. identify cropland cells with middle texture and at least 2 units of carbon:
## r_combined <- all(r_landuse == 'crop',
## r_texture == 'middle',
## r_carbon >= 2
## )
##
## inspect the result:
## par(mfrow = c(2, 2))
## ls(pattern = '^r_.*') |>
## Map(f = \(n) {
## plot(get(n), main = n)
## if(n == 'r_landuse') plot(v_landuse, add = TRUE)
## }) |I_O |2023-06-02 13:23:01Z |2755 | 14| 2| 0|
## | 76389697|You need to split on \n\n (not \n) then replace the middle \n values:
## magick::image_read(image_greek) %>%
## ocr() %>%
## str_split("\n\n") %>%
## unlist() %>%
## str_replace_all("\n", " ")
##
## Output:
## [1] "© Much learning does not teach understanding."
## [2] "© The road up and the road down is one and the same."
## [3] "© Our envy always lasts longer than the happiness of those we envy."
## [4] "© No man ever steps in the same river twice, for it's not the same river and he's not the same man. " |jpsmith |2023-06-02 12:05:08Z |9359 | 33| 14| 5|
## | 76389697|one approach:
## heraclitus_sentences <- list(c('this is', 'the first sentence',
## '', 'and', 'this', 'the second'))
##
## separator <- '___'
##
## gsub('^$', separator, heraclitus_sentences[[1]]) |>
## paste(collapse = ' ') |>
## strsplit(separator)
##
## [[1]]
## [1] "this is the first sentence " " and this the second"
##
## Please remember to present code/sample data in a format others can readily work with (no screenshots). |I_O |2023-06-02 12:00:14Z |2755 | 14| 2| 0|
## | 76389613|You can simply extract the rows satisfying your condition:
## matrix2 <- matrix1[substr(matrix1[, 1], 6, 7) == '02', ] |user22007196 |2023-06-02 11:26:00Z |26 | 0| 0| 0|
## | 76389515|Yo need to add a third digit:
## df$cause <- ifelse(df$code >= "C000" & df$code <= "D489", "cause 1",
## ifelse(df$code >= "D500" & df$code <= "D899", "cause 2", NA))
##
##
## > df
## id code cause
## 1 1 C410 cause 1
## 2 2 D486 cause 1
## 3 3 D485 cause 1
## 4 4 D501 cause 2
## 5 5 D600 cause 2
## 6 6 D899 cause 2 |Gerald T |2023-06-02 11:01:14Z |649 | 17| 3| 0|
## | 76389510|We can do this with dplyr and case_when ()
## library(dplyr)
##
## #example data
## data <- structure(list(S.NO = c("P1", "P2", "P3", "P4", "P5", "P6"), Numbers = c(2, 5, 2, 2, 3, 4)), class = "data.frame", row.names = c(NA, -6L))
##
## create new column with filters
## data <- data %>%
## mutate(Opt-Numbers = case_when(
## Numbers >= 4 ~ "Opt-CMM",
## Numbers < 4 ~ "Opt-MM"
## ))
##
## data |pbraeutigm |2023-06-02 11:13:50Z |309 | 8| 3| 0|
## | 76389468|Does this work?
## library(tidyverse)
## mydata %>%
## # Filter on months 6 and 7
## filter(str_sub(month_id, -2) %in% c("07", "06")) %>%
## # Sort by Key + yearmonth
## arrange(MDM_Key, month_id) %>%
## # Group by Key
## group_by(MDM_Key) %>%
## # Calculate difference between sales
## mutate(sale_diff = sale_count - lag(sale_count)) %>%
## # Return unique values for each Key + year
## filter(str_sub(month_id, -2) == "07") %>%
## mutate(year = as.integer(str_sub(month_id, 1, 4))) %>%
## select(year, MDM_Key, sale_count = sale_diff)
## # A tibble: 12 x 3
## # Groups: MDM_Key [2]
## year MDM_Key sale_count
## <int> <int> <int>
## 1 2012 1 -829881
## 2 2013 1 -97422
## 3 2014 1 -4785612
## 4 2015 1 -2192138
## 5 2016 1 756600
## 6 2017 1 1968317
## 7 2012 2 -1659762
## 8 2013 2 -194844
## 9 2014 2 -9571224
## 10 2015 2 -4384276
## 11 2016 2 1513200
## 12 2017 2 3936634 |NicChr |2023-06-02 11:28:25Z |490 | 8| 0| 0|
## | 76389386|You could use unique():
## library(dplyr, warn = FALSE)
##
## df |>
## group_by(variant, ID) |>
## summarise(
## across(everything(), ~ toString(na.omit(unique(.x))))
## )
## #> `summarise()` has grouped output by 'variant'. You can override using the
## #> `.groups` argument.
## #> # A tibble: 4 Ã 4
## #> # Groups: variant [4]
## #> variant ID value1 value2
## #> <chr> <chr> <chr> <chr>
## #> 1 1-12345-GT A GJB1 0.5, 1
## #> 2 2-3456-C-T B TBC 0.5
## #> 3 3-45567-C-A C TZY 0.5
## #> 4 4-566879-C-T D FBY 0.5 |stefan |2023-06-02 10:48:11Z |84193 | 50| 25| 6|
## | 76389243|You can use littler and its r front-end. It provides a vector argv (just like C does) which is also easier to use programmatically than commandArgs(). (We most often use this with additional command-line parsing packages; I liked docopt a lot and have many examples.)
## Code
## #!/usr/bin/r
##
## for (i in seq_along(argv)) {
## cat("Argument", i, "is", argv[i], "\n")
## }
##
## Demo
## $ ./argcount.r a b c d e f g h i j k l
## Argument 1 is a
## Argument 2 is b
## Argument 3 is c
## Argument 4 is d
## Argument 5 is e
## Argument 6 is f
## Argument 7 is g
## Argument 8 is h
## Argument 9 is i
## Argument 10 is j
## Argument 11 is k
## Argument 12 is l
## $
##
## (This worked all the way up to 26 when I just tested with all letters.)
## Littler ships with a Makevars for Linux and macOS and could build on Windows but I have not a need. If someone wants to "port" this, the very similar package RInside is structured similarly and can provide a model. |Dirk Eddelbuettel |2023-06-02 13:00:24Z |358804 | 722| 638| 56|
## | 76389243|BASH has a builtin named shift which might help you.
## #!/usr/bin/bash
##
## echo ${1} ${9}
## shift 9
## echo ${1}
##
## If you save the above script as shift_example.bash and make it executable then you can call it as below to see what I mean.
## ./shift_example.bash A B C D E F G H I J K L
##
## The second echo should print J which is the tenth argument.
## See https://www.gnu.org/software/bash/manual/bash.html for details. |chandra |2023-06-02 12:51:32Z |63 | 6| 0| 0|
## | 76389050|Two basic approaches: (1) do a yearly sequence, conditioning on whether the start is before and if the end is after June (06), used in the base R approach; (2) do a monthly sequence, conditioning (group-filtering) each year on whether "06" is in one of the months. They both work, which you choose depends on comfort/preference.
## base R
## Since this example uses "sequence by year", I'll use the fact that POSIXlt (notice the l, as opposed to the more common POSIXct) is really a list with elements for year, month, etc:
## dput(as.POSIXlt(Sys.time()))
## # structure(list(sec = 21.7977600097656, min = 30L, hour = 8L,
## # mday = 2L, mon = 5L, year = 123L, wday = 5L, yday = 152L,
## # isdst = 1L, zone = "EDT", gmtoff = -14400L), class = c("POSIXlt",
## # "POSIXt"), tzone = c("", "EST", "EDT"))
##
## With that, we can easily $-index the list and compare the month as a value. NOTE: ?POSIXlt shows that mon is âmonâ 0-11: months after the first of the year, meaning that June is 5 in 0-based months. (Ergo the use of 5 below.)
## fun <- function(sd, ed) {
## sdlt <- as.POSIXlt(sd)
## edlt <- as.POSIXlt(ed)
## if (sdlt$mon > 5) sdlt$year <- sdlt$year+1
## if (edlt$mon < 5) edlt$year <- edlt$year-1
## if (sdlt <= edlt) unique(format(seq.Date(as.Date(sdlt), as.Date(edlt), by = "year"), format="%Y"))
## }
## years <- Map(fun, mydata$startdate, mydata$enddate)
## str(years)
## # List of 5
## # $ : chr [1:5] "2008" "2009" "2010" "2011" ...
## # $ : chr [1:4] "2008" "2009" "2010" "2011"
## # $ : chr "2012"
## # $ : chr "2012"
## # $ : NULL
## out <- data.frame(
## id = rep(mydata$id, lengths(years)),
## statenumber = rep(mydata$statenumber, lengths(years)),
## year = unlist(years))
## out
## # id statenumber year
## # 1 R007 1 2008
## # 2 R007 1 2009
## # 3 R007 1 2010
## # 4 R007 1 2011
## # 5 R007 1 2012
## # 6 R008 2 2008
## # 7 R008 2 2009
## # 8 R008 2 2010
## # 9 R008 2 2011
## # 10 R008 3 2012
## # 11 R009 4 2012
##
## If there's a chance to have duplicates (repeated states for an id that gap within the same year), then you can use unique(out).
## Map just "zips" the data together. The individual calls to fun unrolled would look like this:
## list(
## fun(mydata$startdate[1], mydata$enddate[1]),
## fun(mydata$startdate[2], mydata$enddate[2]),
## fun(mydata$startdate[3], mydata$enddate[3]),
## fun(mydata$startdate[4], mydata$enddate[4]),
## fun(mydata$startdate[5], mydata$enddate[5])
## )
##
## The inclusion of ed on the end of c(seq.Date(..), ed) is to guard against the fact that seq may not include the year of the enddate. In this case, it ensures that R008 in state 2 sees 2012.
## dplyr
## In this (and the data.table) section, we'll use the monthly sequence instead, using format="%m" as the month. Dissimilar from POSIXlt above (June is 5), reading ?%strptime for the %-codes defines %m as â%mâ Month as decimal number (01-12), so June is back to "06".
## library(dplyr)
## mydata %>%
## rowwise() %>%
## summarize(
## id, statenumber,
## dates = seq(startdate, enddate, by = "month"),
## year = format(dates, format = "%Y")) %>%
## group_by(id, statenumber, year) %>%
## filter(any(format(dates, format = "%m") == "06")) %>%
## distinct(id, statenumber, year) %>%
## ungroup()
## # # A tibble: 11 Ã 3
## # id statenumber year
## # <chr> <dbl> <chr>
## # 1 R007 1 2008
## # 2 R007 1 2009
## # 3 R007 1 2010
## # 4 R007 1 2011
## # 5 R007 1 2012
## # 6 R008 2 2008
## # 7 R008 2 2009
## # 8 R008 2 2010
## # 9 R008 2 2011
## # 10 R008 3 2012
## # 11 R009 4 2012
##
## I generally try to avoid rowwise when able, but this problem does need to be executed one row at a time (which is effectively what Map is doing in the base R solution above).
## data.table
## library(data.table)
## as.data.table(mydata)[, .(id, statenumber, dates = seq(startdate, enddate, by = "month")), by = .(seq(nrow(mydata)))
## ][, year := format(dates, format="%Y")
## ][, .SD[any(format(dates, format="%m") == "06"),], by = .(id, statenumber, year)
## ][, c("seq", "dates") := NULL
## ][, unique(.SD)]
## # id statenumber year
## # <char> <num> <char>
## # 1: R007 1 2008
## # 2: R007 1 2009
## # 3: R007 1 2010
## # 4: R007 1 2011
## # 5: R007 1 2012
## # 6: R008 2 2008
## # 7: R008 2 2009
## # 8: R008 2 2010
## # 9: R008 2 2011
## # 10: R008 3 2012
## # 11: R009 4 2012 |r2evans |2023-06-02 11:56:09Z |134740 | 142| 75| 6|
## | 76389050|Perhaps not the prettiest solution but would this work?
## library(tidyverse)
## library(tidyr)
## mydata %>%
## mutate(yearmonth = startdate) %>%
## group_by(id, statenumber) %>%
## complete(yearmonth = seq(startdate, enddate, by = "month")) %>%
## arrange(id, yearmonth) %>%
## mutate(year = year(yearmonth),
## month = month(yearmonth)) %>%
## group_by(id) %>%
## fill(statenumber, .direction = "down") %>%
## mutate(year_residence = if_else(month == 6L, year, NA_integer_)) %>%
## group_by(id, year, statenumber) %>%
## fill(year_residence, .direction = "updown") %>%
## ungroup() %>%
## distinct(id, statenumber, year_residence) %>%
## filter(!is.na(year_residence)) |NicChr |2023-06-02 12:13:26Z |490 | 8| 0| 0|
## | 76389028|something like this?
## library (ggplot2)
## library(dplyr)
##
## df <- data.frame(
## x = gl(4, 1, labels = c('first', 'A', 'second', 'B')),
## y = rnorm(4),
## show_marker = c(FALSE, TRUE)
## )
##
## df |>
## ggplot(aes(x,y)) +
## geom_point(data = df |> filter(show_marker)) +
## scale_x_discrete(drop = FALSE, ) +
## coord_flip() +
## theme(axis.text.y = element_text(angle = c(90, 0),
## hjust = c(0)
## )
## ) |I_O |2023-06-02 16:30:36Z |2755 | 14| 2| 0|
## | 76389028|We could do it with facets:
## # data
## df =
## data.frame(
## x = factor(c("A", "B", "C", "B", "A", "C")),
## label = factor(c("header 1", "header 2", "header 3", "header 2","header 1", "header 3")),
## y = c(10, 15, 8, 12, 9, 10)
## )
##
## ggplot(df, aes(x,y)) +
## geom_point() +
## coord_flip() +
## facet_wrap(~ label,ncol = 1,strip.position = "left", scales = "free_y") +
## theme(panel.spacing = unit(0,'cm'),
## strip.placement = "outside",
## strip.background = element_blank(),
## strip.text = element_text(hjust = 0))
##
##
##
## Created on 2023-06-02 with reprex v2.0.2 |M Aurélio |2023-06-02 18:44:46Z |762 | 12| 4| 0|
## | 76388858|You can add text whereever you want. For example
## library(terra)
## p <- terra::vect(system.file("ex/lux.shp", package="terra"))
## r <- terra::rast(system.file("ex/elev.tif", package="terra"))
##
## plot(r, mar=c(3,3,3,6))
## text(x=6.77, y=49.95, "Score", srt=-90, cex=2, xpd=NA, pos=4)
## lines(p) |Robert Hijmans |2023-06-02 12:21:50Z |39179 | 60| 52| 4|
## | 76388339|For some reason, your count column is a list.
## library(data.table)
## df <- structure(list(summary_type = c("0", "1", "2"), count = structure(list(
## 62234441L, 5119L, 821770L))), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
##
## df[count]
## #Error in `[.data.table`(df, count, ) :
## # count is not found in calling scope but it is a column of type list.
##
## Use unlist:
## df[order(unlist(count))]
##
## # summary_type count
## # 1: 1 5119
## # 2: 2 821770
## # 3: 0 62234441 |Maël |2023-06-02 08:21:58Z |42274 | 66| 27| 3|
## | 76388303|You could pass a named vector or list to the choices argument.
## From the docs (?selectInput):
##
## If elements of the list are named, then that name â rather than the value â is displayed to the user.
##
## library(tidyverse)
## library(shiny)
##
## test_id <- c(1, 2, 3)
## test_title <- c("a", "b", "c")
##
## test_df <- data.frame(test_id, test_title)
## choices <- setNames(test_df$test_id, test_df$test_title)
##
## ui <- fluidPage(
## selectInput("test", "test", choices = choices),
## textOutput("selectedItem")
## )
##
## server <- function(input, output, session) {
## observeEvent(input$test, {
## output$selectedItem <- renderText(input$test)
## })
## }
##
## shinyApp(ui, server)
## #>
## #> Listening on http://127.0.0.1:5936 |stefan |2023-06-02 08:17:05Z |84193 | 50| 25| 6|
## | 76388032|As @adroMine said, the best option is pivot_wider from tidyr package.I suggest that you install tidyverse. One solution to your problem is the next one:
## library(tidyverse)
##
## # Assuming your original dataframe is named "dataframe1"
## # Let's say it looks like this:
## # dataframe1 <- data.frame(gene_name = c("gene1", "gene2", "gene3"),
## # mutation = c("mutation1", "mutation2", "mutation3"),
## # sample = c("sample1", "sample2", "sample3"))
##
## # Pivot dataframe1 to convert it to dataframe2
## dataframe2 <- dataframe1 %>%
## pivot_wider(names_from = sample, values_from = mutation, values_fill = 0) %>%
## mutate(across(-gene_name, ~ifelse(. != 0, 1, 0)))
##
## # If you want to rename the columns to match the desired structure
## colnames(dataframe2)[-1] <- paste0("sample ", colnames(dataframe2)[-1])
##
## With this you should have your solution. If you can provide some data examples It will be easier to help you. |Jorge A |2023-06-02 08:15:16Z |21 | 6| 23| 3|
## | 76388032|with(df, table(name, sample))
## sample
## name sample1 sample2 sample3
## gene1 1 0 0
## gene2 0 1 0
## gene4 0 1 1
##
## xtabs(~name+sample, df)
## sample
## name sample1 sample2 sample3
## gene1 1 0 0
## gene2 0 1 0
## gene4 0 1 1
##
## tidyr::pivot_wider(cbind(df, value=1),
## id_cols = name, names_from = sample, values_fill = 0)
##
## # A tibble: 3 Ã 4
## name sample1 sample2 sample3
## <chr> <dbl> <dbl> <dbl>
## 1 gene1 1 0 0
## 2 gene2 0 1 0
## 3 gene4 0 1 1
##
## mosaic::counts(~sample|name, df)[-1]
##
## name n_sample1 n_sample2 n_sample3
## 1 gene1 1 NA NA
## 2 gene2 NA 1 NA
## 3 gene4 NA 1 1 |Onyambu |2023-06-02 19:35:31Z |64533 | 53| 0| 0|
## | 76387472|I suspect you already have a column named name or value before you run pivot_longer, which by default tries to create columns with those names. As noted here, the error message isn't necessarily clear that's the problem.
## Try grep("name", colnames(as)) and grep("value", colnames(as)) to find those columns.
## Either rename in your data frame or use pivot_longer( ... names_to = "a_new_name_col", values_to = "a_new_value_col")
## data.frame(a = 1:2, name = 3:4, value = 7:8) %>%
## tidyr::pivot_longer(a)
## #Error in `vec_cbind()`:
## #! Names must be unique.
## #â These names are duplicated:
## # * "name" at locations 1 and 3.
## # * "value" at locations 2 and 4.
## #â¹ Use argument `names_repair` to specify repair strategy.
## #Run `rlang::last_trace()` to see where the error occurred.
##
## data.frame(a = 1:2, name2 = 3:4, value2 = 7:8) %>%
## tidyr::pivot_longer(a)
## ## A tibble: 2 Ã 4
## # name2 value2 name value
## # <int> <int> <chr> <int>
## #1 3 7 a 1
## #2 4 8 a 2 |Jon Spring |2023-06-02 07:03:57Z |52995 | 52| 35| 4|
## | 76387441|The margins package only computes slopes, not predictions. Also, that package is not being actively maintained and developed anymore (perhaps only critical bug fixes).
## You can use the marginaleffects package instead, which is a newer package I developed as a more flexible successor to margins (note: conflict of interest).
## See this vignette for a syntax comparison with Stata: https://vincentarelbundock.github.io/marginaleffects/articles/alternative_software.html
## The code you need will probably look similar to this:
## library(nnet)
## library(marginaleffects)
##
## m <- multinom(Species ~ Petal.Length + Petal.Width, data = iris, trace = FALSE)
##
## avg_predictions(m, newdata = datagrid(Petal.Length = c(1, 4, 6)))
## #
## # Group Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % Petal.Width Petal.Length
## # setosa 0.333 0.0479 6.95 < 0.001 38.0 0.2389 0.427 1.2 1
## # versicolor 0.382 0.1195 3.20 0.00139 9.5 0.1477 0.616 1.2 4
## # virginica 0.285 0.1192 2.39 0.01672 5.9 0.0516 0.519 1.2 6
## #
## # Columns: rowid, group, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, Species, Petal.Width, Petal.Length |Vincent |2023-06-02 12:09:11Z |15442 | 38| 36| 7|
## | 76387189|The "S.h" headers file is from the "S" language (the precursor to R); replacing "S.h" with "R.h" fixes the 'cant find S.h' error, but causes other issues. Clearly this package is not being maintained :(
## I've forked the repository and made a couple of changes to the source code (commits fe15cf9 and ab9fe5c). I successfully installed both the scalop and infercna packages via Bioconductor, but there are a lot of warnings when they compile. I used gcc to compile them, rather than Apple Clang, with these flags:
## cat ~/.R/Makevars
## LOC=/usr/local/gfortran
## CC=$(LOC)/bin/gcc -fopenmp
## CXX=$(LOC)/bin/g++ -fopenmp
## CXX11=$(LOC)/bin/g++ -fopenmp
##
## CFLAGS=-g -O3 -Wall -pedantic -std=gnu99 -mtune=native -pipe
## CXXFLAGS=-g -O3 -Wall -pedantic -std=c++11 -mtune=native -pipe
## LDFLAGS=-L$(LOC)/lib -Wl,-rpath,$(LOC)/lib,-L/usr/local/lib
## CPPFLAGS=-I$(LOC)/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include -I/usr/local/include
##
## FLIBS=-L/usr/local/gfortran/lib/gcc/x86_64-apple-darwin19/10.2.0 -L/usr/local/gfortran/lib -lgfortran -lquadmath -lm
## CXX1X=/usr/local/gfortran/bin/g++
## CXX98=/usr/local/gfortran/bin/g++
## CXX11=/usr/local/gfortran/bin/g++
## CXX14=/usr/local/gfortran/bin/g++
## CXX17=/usr/local/gfortran/bin/g++
##
## If you have problems installing the scalop package from source using Apple Clang, and you have an intel processor, my instructions for compiling R packages from source are here: https://stackoverflow.com/a/65334247/12957340
## If you have an Apple silicon processor, you can try the instructions here: https://stackoverflow.com/a/68275558/12957340
## This is how I installed the packages:
## install.packages("BiocManager")
## library(BiocManager)
## BiocManager::install("Homo.sapiens")
## BiocManager::install("jpmam1/scalop") # my forked copy
## BiocManager::install("jlaffy/infercna")
##
## The example from the vignette runs, but some of the functions no longer work as expected:
## library(infercna)
## #>
## #>
## #> Warning: replacing previous import 'AnnotationDbi::select' by 'dplyr::select'
## #> when loading 'scalop'
## #>
## #> Attaching package: 'infercna'
## #> The following object is masked from 'package:graphics':
## #>
## #> clip
## set.seed(1014)
## useGenome('hg19')
## #> Genome has been set to hg19
## retrieveGenome()
## #> Retrieving: hg19
## #> # A tibble: 33,575 Ã 8
## #> symbol start_position end_position chromosome_name arm band strand
## #> <chr> <dbl> <dbl> <fct> <fct> <chr> <int>
## #> 1 DDX11L1 11869 14412 1 1p p36.33 1
## #> 2 WASH7P 14363 29806 1 1p p36.33 -1
## #> 3 MIR1302-11 29554 31109 1 1p p36.33 1
## #> 4 FAM138A 34554 36081 1 1p p36.33 -1
## #> 5 OR4G4P 52473 54936 1 1p p36.33 1
## #> 6 OR4G11P 62948 63887 1 1p p36.33 1
## #> 7 OR4F5 69091 70008 1 1p p36.33 1
## #> 8 CICP27 131025 134836 1 1p p36.33 1
## #> 9 RNU6-1100P 157784 157887 1 1p p36.33 -1
## #> 10 CICP7 329431 332236 1 1p p36.33 -1
## #> # â¹ 33,565 more rows
## #> # â¹ 1 more variable: ensembl_gene_id <chr>
## m = useData(mgh125)
## dim(m)
## #> [1] 8556 1266
## range(m)
## #> [1] 0.000 15.328
## lengths(refCells)
## #> oligodendrocytes macrophages
## #> 219 707
##
## cna = infercna(m = m, refCells = refCells, n = 5000, noise = 0.1, isLog = TRUE, verbose = FALSE)
## cnaM = cna[, !colnames(cna) %in% unlist(refCells)]
##
## cnaScatterPlot(cna = cna,
## signal.threshold = NULL,
## main = 'Default')
##
##
##
## obj = cnaPlot(cna = cna,
## order.cells = TRUE,
## subtitle = 'Copy-Number Aberrations in a patient with Glioblastoma')
## #> Error in if (class(x) == "matrix") {: the condition has length > 1
##
## Depending on your use-case, you'll probably need to make further changes to the source code to get your desired output. If you have further errors/questions please post them in the comments and I'll take a look at them when I have some time.
## sessionInfo()
## #> R version 4.3.0 (2023-04-21)
## #> Platform: x86_64-apple-darwin20 (64-bit)
## #> Running under: macOS Ventura 13.3.1
## #>
## #> Matrix products: default
## #> BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## #> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
## #>
## #> locale:
## #> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## #>
## #> time zone: Australia/Melbourne
## #> tzcode source: internal
## #>
## #> attached base packages:
## #> [1] stats graphics grDevices utils datasets methods base
## #>
## #> other attached packages:
## #> [1] infercna_1.0.0
## #>
## #> loaded via a namespace (and not attached):
## #> [1] splines_4.3.0
## #> [2] BiocIO_1.10.0
## #> [3] bitops_1.0-7
## #> [4] ggplotify_0.1.0
## #> [5] filelock_1.0.2
## #> [6] tibble_3.2.1
## #> [7] R.oo_1.25.0
## #> [8] polyclip_1.10-4
## #> [9] graph_1.78.0
## #> [10] reprex_2.0.2
## #> [11] XML_3.99-0.14
## #> [12] lifecycle_1.0.3
## #> [13] rstatix_0.7.2
## #> [14] edgeR_3.42.4
## #> [15] Homo.sapiens_1.3.1
## #> [16] lattice_0.21-8
## #> [17] MASS_7.3-60
## #> [18] OrganismDbi_1.42.0
## #> [19] backports_1.4.1
## #> [20] magrittr_2.0.3
## #> [21] limma_3.56.1
## #> [22] plotly_4.10.1
## #> [23] rmarkdown_2.22
## #> [24] yaml_2.3.7
## #> [25] metapod_1.8.0
## #> [26] cowplot_1.1.1
## #> [27] DBI_1.1.3
## #> [28] RColorBrewer_1.1-3
## #> [29] abind_1.4-5
## #> [30] zlibbioc_1.46.0
## #> [31] Rtsne_0.16
## #> [32] R.cache_0.16.0
## #> [33] GenomicRanges_1.52.0
## #> [34] purrr_1.0.1
## #> [35] mixtools_2.0.0
## #> [36] R.utils_2.12.2
## #> [37] msigdbr_7.5.1
## #> [38] ggraph_2.1.0
## #> [39] BiocGenerics_0.46.0
## #> [40] RCurl_1.98-1.12
## #> [41] styler_1.10.0
## #> [42] yulab.utils_0.0.6
## #> [43] tweenr_2.0.2
## #> [44] rappdirs_0.3.3
## #> [45] GenomeInfoDbData_1.2.10
## #> [46] IRanges_2.34.0
## #> [47] S4Vectors_0.38.1
## #> [48] enrichplot_1.20.0
## #> [49] ggrepel_0.9.3
## #> [50] irlba_2.3.5.1
## #> [51] tidytree_0.4.2
## #> [52] dqrng_0.3.0
## #> [53] DelayedMatrixStats_1.22.0
## #> [54] codetools_0.2-19
## #> [55] DelayedArray_0.26.3
## #> [56] scuttle_1.10.1
## #> [57] DOSE_3.26.1
## #> [58] xml2_1.3.4
## #> [59] ggforce_0.4.1
## #> [60] tidyselect_1.2.0
## #> [61] aplot_0.1.10
## #> [62] farver_2.1.1
## #> [63] ScaledMatrix_1.8.1
## #> [64] viridis_0.6.3
## #> [65] matrixStats_0.63.0
## #> [66] stats4_4.3.0
## #> [67] BiocFileCache_2.8.0
## #> [68] GenomicAlignments_1.36.0
## #> [69] jsonlite_1.8.4
## #> [70] BiocNeighbors_1.18.0
## #> [71] tidygraph_1.2.3
## #> [72] survival_3.5-5
## #> [73] segmented_1.6-4
## #> [74] tools_4.3.0
## #> [75] progress_1.2.2
## #> [76] treeio_1.24.1
## #> [77] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## #> [78] Rcpp_1.0.10
## #> [79] glue_1.6.2
## #> [80] gridExtra_2.3
## #> [81] xfun_0.39
## #> [82] qvalue_2.32.0
## #> [83] MatrixGenerics_1.12.0
## #> [84] GenomeInfoDb_1.36.0
## #> [85] dplyr_1.1.2
## #> [86] withr_2.5.0
## #> [87] BiocManager_1.30.20
## #> [88] fastmap_1.1.1
## #> [89] bluster_1.10.0
## #> [90] fansi_1.0.4
## #> [91] rsvd_1.0.5
## #> [92] caTools_1.18.2
## #> [93] digest_0.6.31
## #> [94] R6_2.5.1
## #> [95] gridGraphics_0.5-1
## #> [96] colorspace_2.1-0
## #> [97] GO.db_3.17.0
## #> [98] biomaRt_2.56.0
## #> [99] RSQLite_2.3.1
## #> [100] R.methodsS3_1.8.2
## #> [101] utf8_1.2.3
## #> [102] tidyr_1.3.0
## #> [103] generics_0.1.3
## #> [104] data.table_1.14.8
## #> [105] rtracklayer_1.60.0
## #> [106] prettyunits_1.1.1
## #> [107] graphlayouts_1.0.0
## #> [108] httr_1.4.6
## #> [109] htmlwidgets_1.6.2
## #> [110] S4Arrays_1.0.4
## #> [111] scatterpie_0.2.0
## #> [112] pkgconfig_2.0.3
## #> [113] gtable_0.3.3
## #> [114] blob_1.2.4
## #> [115] SingleCellExperiment_1.22.0
## #> [116] XVector_0.40.0
## #> [117] shadowtext_0.1.2
## #> [118] clusterProfiler_4.8.1
## #> [119] htmltools_0.5.5
## #> [120] carData_3.0-5
## #> [121] fgsea_1.26.0
## #> [122] scalop_1.1.0
## #> [123] RBGL_1.76.0
## #> [124] scales_1.2.1
## #> [125] Biobase_2.60.0
## #> [126] png_0.1-8
## #> [127] scran_1.28.1
## #> [128] ggfun_0.0.9
## #> [129] knitr_1.43
## #> [130] rstudioapi_0.14
## #> [131] reshape2_1.4.4
## #> [132] rjson_0.2.21
## #> [133] nlme_3.1-162
## #> [134] curl_5.0.0
## #> [135] org.Hs.eg.db_3.17.0
## #> [136] cachem_1.0.8
## #> [137] stringr_1.5.0
## #> [138] parallel_4.3.0
## #> [139] HDO.db_0.99.1
## #> [140] AnnotationDbi_1.62.1
## #> [141] restfulr_0.0.15
## #> [142] pillar_1.9.0
## #> [143] grid_4.3.0
## #> [144] vctrs_0.6.2
## #> [145] ggpubr_0.6.0
## #> [146] BiocSingular_1.16.0
## #> [147] car_3.1-2
## #> [148] beachmat_2.16.0
## #> [149] dbplyr_2.3.2
## #> [150] cluster_2.1.4
## #> [151] evaluate_0.21
## #> [152] zeallot_0.1.0
## #> [153] GenomicFeatures_1.52.0
## #> [154] locfit_1.5-9.7
## #> [155] cli_3.6.1
## #> [156] compiler_4.3.0
## #> [157] Rsamtools_2.16.0
## #> [158] rlang_1.1.1
## #> [159] crayon_1.5.2
## #> [160] ggsignif_0.6.4
## #> [161] plyr_1.8.8
## #> [162] fs_1.6.2
## #> [163] stringi_1.7.12
## #> [164] viridisLite_0.4.2
## #> [165] BiocParallel_1.34.2
## #> [166] babelgene_22.9
## #> [167] munsell_0.5.0
## #> [168] Biostrings_2.68.1
## #> [169] lazyeval_0.2.2
## #> [170] GOSemSim_2.26.0
## #> [171] Matrix_1.5-4.1
## #> [172] patchwork_1.1.2
## #> [173] hms_1.1.3
## #> [174] sparseMatrixStats_1.12.0
## #> [175] bit64_4.0.5
## #> [176] ggplot2_3.4.2
## #> [177] statmod_1.5.0
## #> [178] KEGGREST_1.40.0
## #> [179] SummarizedExperiment_1.30.1
## #> [180] kernlab_0.9-32
## #> [181] igraph_1.4.3
## #> [182] broom_1.0.4
## #> [183] memoise_2.0.1
## #> [184] ggtree_3.8.0
## #> [185] fastmatch_1.1-3
## #> [186] bit_4.0.5
## #> [187] downloader_0.4
## #> [188] gson_0.1.0
## #> [189] ape_5.7-1
##
## Created on 2023-06-02 with reprex v2.0.2 |jared_mamrot |2023-06-02 06:22:00Z |21404 | 43| 20| 4|
## | 76387189|All - because in jared's answer he notes installing GCC can take hours, I wanted to see if the changes he made, in conjunction with use of BiocManager for installation, would solve the problem.
## In fact, for me it does.
## Specifically, the procedure is:
## Install all other dependencies first, e.g. by running
## devtools::install_github("jlaffy/infercna")
##
## after the dependencies install, the package will fail as indicated in the original post. Following this, run:
## install.packages("BiocManager") # if not installed
## library(BiocManager)
## BiocManager::install("Homo.sapiens")
## BiocManager::install("jpmam1/scalop") # Jared's awesome forked copy
## BiocManager::install("jlaffy/infercna")
##
## For me, on R v4.3, MacOSX 11.7, intel chip, this returns:
## BiocManager::install("Homo.sapiens")
## 'getOption("repos")' replaces Bioconductor standard repositories, see 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com/
## Bioconductor version 3.17 (BiocManager 1.30.20), R 4.3.0 (2023-04-21)
## Installing package(s) 'Homo.sapiens'
## also installing the dependencies âgraphâ, âRBGLâ, âOrganismDbiâ
##
## trying URL 'https://bioconductor.org/packages/3.17/bioc/bin/macosx/big-sur-x86_64/contrib/4.3/graph_1.78.0.tgz'
## Content type 'application/x-gzip' length 2086473 bytes (2.0 MB)
## ==================================================
## downloaded 2.0 MB
##
## trying URL 'https://bioconductor.org/packages/3.17/bioc/bin/macosx/big-sur-x86_64/contrib/4.3/RBGL_1.76.0.tgz'
## Content type 'application/x-gzip' length 3547879 bytes (3.4 MB)
## ==================================================
## downloaded 3.4 MB
##
## trying URL 'https://bioconductor.org/packages/3.17/bioc/bin/macosx/big-sur-x86_64/contrib/4.3/OrganismDbi_1.42.0.tgz'
## Content type 'application/x-gzip' length 796254 bytes (777 KB)
## ==================================================
## downloaded 777 KB
##
##
## The downloaded binary packages are in
## /var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T//Rtmp6rXv9T/downloaded_packages
## installing the source package âHomo.sapiensâ
##
## trying URL 'https://bioconductor.org/packages/3.17/data/annotation/src/contrib/Homo.sapiens_1.3.1.tar.gz'
## Content type 'application/x-gzip' length 1617 bytes
## ==================================================
## downloaded 1617 bytes
##
## * installing *source* package âHomo.sapiensâ ...
## ** using staged installation
## ** R
## ** data
## ** byte-compile and prepare package for lazy loading
## ** help
## *** installing help indices
## ** building package indices
## ** testing if installed package can be loaded from temporary location
## ** testing if installed package can be loaded from final location
## ** testing if installed package keeps a record of temporary installation path
## * DONE (Homo.sapiens)
##
## The downloaded source packages are in
## â/private/var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T/Rtmp6rXv9T/downloaded_packagesâ
## Old packages: 'matrixStats'
## Update all/some/none? [a/s/n]: BiocManager::install("jpmam1/scalop")
## Update all/some/none? [a/s/n]:
## a
## trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-x86_64/contrib/4.3/matrixStats_0.63.0.tgz'
## Content type 'application/x-gzip' length 653132 bytes (637 KB)
## ==================================================
## downloaded 637 KB
##
##
## The downloaded binary packages are in
## /var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T//Rtmp6rXv9T/downloaded_packages
## > BiocManager::install("jpmam1/scalop")
## 'getOption("repos")' replaces Bioconductor standard repositories, see 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com/
## Bioconductor version 3.17 (BiocManager 1.30.20), R 4.3.0 (2023-04-21)
## Installing github package(s) 'jpmam1/scalop'
## Downloading GitHub repo jpmam1/scalop@HEAD
## These packages have more recent versions available.
## It is recommended to update all of them.
## Which would you like to update?
##
## 1: All
## 2: CRAN packages only
## 3: None
## 4: matrixStats (0.63.0 -> 1.0.0) [CRAN]
##
## Enter one or more numbers, or an empty line to skip updates: 1
## matrixStats (0.63.0 -> 1.0.0) [CRAN]
## Installing 1 packages: matrixStats
## trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-x86_64/contrib/4.3/matrixStats_0.63.0.tgz'
## Content type 'application/x-gzip' length 653132 bytes (637 KB)
## ==================================================
## downloaded 637 KB
##
##
## The downloaded binary packages are in
## /var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T//Rtmp6rXv9T/downloaded_packages
## ââ R CMD build ââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
## â checking for file â/private/var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T/Rtmp6rXv9T/remotes13284246ba83/jpmam1-scalop-ab9fe5c/DESCRIPTIONâ ...
## â preparing âscalopâ:
## â checking DESCRIPTION meta-information ...
## â cleaning src
## â checking for LF line-endings in source and make files and shell scripts
## â checking for empty or unneeded directories
## â building âscalop_1.1.0.tar.gzâ
##
## * installing *source* package âscalopâ ...
## ** using staged installation
## ** libs
## using C compiler: âApple clang version 11.0.3 (clang-1103.0.32.62)â
## using SDK: âMacOSX10.15.sdkâ
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I'/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include' -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c init.c -o init.o
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I'/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include' -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c nd.c -o nd.o
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I'/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include' -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c rowttests.c -o rowttests.o
## clang -arch x86_64 -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/Library/Frameworks/R.framework/Resources/lib -L/opt/R/x86_64/lib -o scalop.so init.o nd.o rowttests.o -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -Wl,CoreFoundation
## installing to /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/00LOCK-scalop/00new/scalop/libs
## ** R
## ** data
## *** moving datasets to lazyload DB
## ** inst
## ** byte-compile and prepare package for lazy loading
## Warning: replacing previous import âAnnotationDbi::selectâ by âdplyr::selectâ when loading âscalopâ
## ** help
## *** installing help indices
## ** building package indices
## ** installing vignettes
## ** testing if installed package can be loaded from temporary location
## Warning: replacing previous import âAnnotationDbi::selectâ by âdplyr::selectâ when loading âscalopâ
## ** checking absolute paths in shared objects and dynamic libraries
## ** testing if installed package can be loaded from final location
## Warning: replacing previous import âAnnotationDbi::selectâ by âdplyr::selectâ when loading âscalopâ
## ** testing if installed package keeps a record of temporary installation path
## * DONE (scalop)
## Old packages: 'matrixStats'
## Update all/some/none? [a/s/n]:
## a
## trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-x86_64/contrib/4.3/matrixStats_0.63.0.tgz'
## Content type 'application/x-gzip' length 653132 bytes (637 KB)
## ==================================================
## downloaded 637 KB
##
##
## The downloaded binary packages are in
## /var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T//Rtmp6rXv9T/downloaded_packages
## > BiocManager::install("jlaffy/infercna")
## 'getOption("repos")' replaces Bioconductor standard repositories, see 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com/
## Bioconductor version 3.17 (BiocManager 1.30.20), R 4.3.0 (2023-04-21)
## Installing github package(s) 'jlaffy/infercna'
## Downloading GitHub repo jlaffy/infercna@HEAD
## ââ R CMD build ââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
## â checking for file â/private/var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T/Rtmp6rXv9T/remotes13287d6716f0/jlaffy-infercna-98a8db8/DESCRIPTIONâ ...
## â preparing âinfercnaâ:
## â checking DESCRIPTION meta-information ...
## â checking for LF line-endings in source and make files and shell scripts
## â checking for empty or unneeded directories
## NB: this package now depends on R (>= 3.5.0)
## WARNING: Added dependency on R >= 3.5.0 because serialized objects in
## serialize/load version 3 cannot be read in older versions of R.
## File(s) containing such objects:
## âinfercna/data-raw/genes.rdaâ
## â building âinfercna_1.0.0.tar.gzâ
##
## * installing *source* package âinfercnaâ ...
## ** using staged installation
## ** R
## ** data
## *** moving datasets to lazyload DB
## ** byte-compile and prepare package for lazy loading
## Warning: replacing previous import âAnnotationDbi::selectâ by âdplyr::selectâ when loading âscalopâ
## Note: ... may be used in an incorrect context
## ** help
## *** installing help indices
## *** copying figures
## ** building package indices
## ** installing vignettes
## ** testing if installed package can be loaded from temporary location
## Warning: replacing previous import âAnnotationDbi::selectâ by âdplyr::selectâ when loading âscalopâ
## ** testing if installed package can be loaded from final location
## Warning: replacing previous import âAnnotationDbi::selectâ by âdplyr::selectâ when loading âscalopâ
## ** testing if installed package keeps a record of temporary installation path
## * DONE (infercna)
## Old packages: 'matrixStats'
## Update all/some/none? [a/s/n]:
## a
## trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-x86_64/contrib/4.3/matrixStats_0.63.0.tgz'
## Content type 'application/x-gzip' length 653132 bytes (637 KB)
## ==================================================
## downloaded 637 KB
##
##
## The downloaded binary packages are in
## /var/folders/hj/1wvjfb692c3gswybcg8xdcwm0000gn/T//Rtmp6rXv9T/downloaded_packages
##
## This saves the user from needing to uninstall (or from having to install) important functionality contained in xcode / GCC.
## For completeness and context, here is my sessionInfo():
## sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Big Sur 11.7
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## Random number generation:
## RNG: Mersenne-Twister
## Normal: Inversion
## Sample: Rounding
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Detroit
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] SummarizedExperiment_1.30.1 gtable_0.3.3 ggplot2_3.4.2 htmlwidgets_1.6.2 remotes_2.4.2
## [6] ggrepel_0.9.3 processx_3.8.1 Biobase_2.60.0 lattice_0.21-8 callr_3.7.3
## [11] vctrs_0.6.2 tools_4.3.0 ps_1.7.5 bitops_1.0-7 generics_0.1.3
## [16] stats4_4.3.0 curl_5.0.0 tibble_3.2.1 fansi_1.0.4 pkgconfig_2.0.3
## [21] Matrix_1.5-4.1 data.table_1.14.8 desc_1.4.2 S4Vectors_0.38.1 lifecycle_1.0.3
## [26] GenomeInfoDbData_1.2.10 compiler_4.3.0 munsell_0.5.0 GenomeInfoDb_1.36.0 htmltools_0.5.5
## [31] RCurl_1.98-1.12 lazyeval_0.2.2 plotly_4.10.1 pillar_1.9.0 crayon_1.5.2
## [36] tidyr_1.3.0 DelayedArray_0.26.3 tidyselect_1.2.0 locfit_1.5-9.7 digest_0.6.31
## [41] dplyr_1.1.2 purrr_1.0.1 rprojroot_2.0.3 fastmap_1.1.1 grid_4.3.0
## [46] colorspace_2.1-0 cli_3.6.1 magrittr_2.0.3 S4Arrays_1.0.4 pkgbuild_1.4.0
## [51] utf8_1.2.3 withr_2.5.0 prettyunits_1.1.1 scales_1.2.1 XVector_0.40.0
## [56] httr_1.4.6 matrixStats_0.63.0 GenomicRanges_1.52.0 IRanges_2.34.0 viridisLite_0.4.2
## [61] rlang_1.1.1 Rcpp_1.0.10 glue_1.6.2 BiocManager_1.30.20 BiocGenerics_0.46.0
## [66] rstudioapi_0.14 jsonlite_1.8.4 R6_2.5.1 MatrixGenerics_1.12.0 zlibbioc_1.46.0 |Vincent Laufer |2023-06-02 12:57:59Z |705 | 26| 10| 0|
## | 76387096|First, Arima() does not fit the model given in your equation. It fits a regression with ARIMA errors like this:
## x_{t} = \alpha_{0} + \beta_{1}z_{t} + \eta_{t}
##
## where
## \eta_t = \phi_{1}\eta_{t-1}+\varepsilon_{t}.
##
## We can rearrange this to give
## x_{t} = (1-\phi_{1})\alpha_{0} + \phi_{1}x_{t-1} + \beta_{1}z_{t} - \beta_{1}\phi_{1}z_{t-1} + \varepsilon_{t}
##
## This explains the major differences in the two results.
## But even if you specified exactly the same model, they would give slightly different results because Arima() uses the true likelihood whereas glm() will use a conditional likelihood because of the initial missing value due to the lag() function.
## See https://robjhyndman.com/hyndsight/arimax/ for a discussion of the different model specifications. |Rob Hyndman |2023-06-03 07:26:18Z |30056 | 85| 72| 7|
## | 76386904|I think you were on the right track with pivot_longer. Applying it to your example data, we get this:
## library(tidyr)
## library(dplyr)
##
## treedata01 %>%
## pivot_longer(starts_with("Species"),
## names_to = "Species")
##
## # A tibble: 120 Ã 8
## Date Field TreeNumber TreeType TreeAge Sample.Location Species value
## <chr> <chr> <int> <chr> <chr> <chr> <chr> <int>
## 1 4/14/2023 Field 3 1 Oak Old Top SpeciesA 80
## 2 4/14/2023 Field 3 1 Oak Old Top SpeciesB 7
## 3 4/14/2023 Field 3 1 Oak Old Top SpeciesC 17
## 4 4/14/2023 Field 3 1 Oak Old Top SpeciesD 19
## 5 4/14/2023 Field 3 1 Oak Old Middle SpeciesA 1
## 6 4/14/2023 Field 3 1 Oak Old Middle SpeciesB 16
## 7 4/14/2023 Field 3 1 Oak Old Middle SpeciesC 23
## 8 4/14/2023 Field 3 1 Oak Old Middle SpeciesD 5
## 9 4/14/2023 Field 3 1 Oak Old Bottom SpeciesA 50
## 10 4/14/2023 Field 3 1 Oak Old Bottom SpeciesB 6
## # â¹ 110 more rows
##
## Now, you say you want "average moths per tree", but perhaps you need to group on other variables too, such as Date or Field ? In any case, it's as simple as adding those variables to dplyr::group_by().
## So for example: average moths per tree by Date and Field, assuming that TreeNumber is a unique ID for a tree. In this code I omit rows with NA values:
## treedata01 %>%
## na.omit() %>%
## pivot_longer(starts_with("Species"),
## names_to = "Species") %>%
## group_by(Date, Field, TreeNumber, Species) %>%
## summarise(meanVal = mean(value)) %>%
## ungroup()
##
## # A tibble: 8 Ã 5
## Date Field TreeNumber Species meanVal
## <chr> <chr> <int> <chr> <dbl>
## 1 4/14/2023 Field 3 1 SpeciesA 27
## 2 4/14/2023 Field 3 1 SpeciesB 7.83
## 3 4/14/2023 Field 3 1 SpeciesC 8.5
## 4 4/14/2023 Field 3 1 SpeciesD 21.8
## 5 4/14/2023 Field 3 2 SpeciesA 0
## 6 4/14/2023 Field 3 2 SpeciesB 0
## 7 4/14/2023 Field 3 2 SpeciesC 0
## 8 4/14/2023 Field 3 2 SpeciesD 0 |neilfws |2023-06-02 05:52:15Z |32150 | 63| 50| 5|
## | 76386843|Use dplyr::pick(), which lets you âpickâ variables from the current dataframe environment, rather than dat[c("vars")], which references the dat object before any transformations.
## library(dplyr)
##
## dat %>%
## rename(newX = x) %>%
## mutate(z = rowSums(pick(newX, y) == 1))
##
## newX y z
## 1 0 0 0
## 2 1 0 1
## 3 0 0 0
## 4 0 1 1
## 5 1 1 2 |zephryl |2023-06-02 02:57:05Z |14244 | 29| 11| 3|
## | 76386786|It work with text outside aes (that surprises me):
## p <-
## ggplot(data = mtcars, aes(x = disp), text = glue::glue('{after_stat({count})}')) +
## geom_histogram()
##
## plotly::ggplotly(p) |Stéphane Laurent |2023-06-02 04:36:20Z |72359 | 223| 115| 14|
## | 76386620|consider doing:
## apply(methyl_dat[-1], 1, \(x)fisher.test(matrix(x,2)), simplify = F) |Onyambu |2023-06-02 01:43:50Z |64533 | 53| 23| 3|
## | 76386620|Adding a dplyr solution:
## library(dplyr, warn.conflicts = FALSE)
## data.frame(loci = c("site1", "site2", "site3", "site4"),
## Methy.tumor = c(50, 5, 60, 12),
## UnMethy.tumor = c(60, 0, 65, 5),
## Methy.Normal = c(13, 5, 22, 3),
## UnMethy.Normal = c(86, 0, 35, 3) ) %>%
## group_by(loci) %>%
## summarise(
## p_val = fisher.test(matrix(c_across(everything()), 2))$p.val
## )
## #> # A tibble: 4 Ã 2
## #> loci p_val
## #> <chr> <dbl>
## #> 1 site1 0.000000392
## #> 2 site2 1
## #> 3 site3 0.263
## #> 4 site4 0.621
##
## Created on 2023-06-01 with reprex v2.0.2 |Baraliuh |2023-06-02 02:13:39Z |1919 | 11| 5| 0|
## | 76386409|This works for me:
## library(MASS)
## library(gam)
## #> Loading required package: splines
## #> Loading required package: foreach
## #> Loaded gam 1.22-2
## names_Boston <- names(Boston[,1:3])
## f1 <- as.formula(paste0('medv ~', paste0('s(', names_Boston, ')', collapse = '+')))
## gam(f1, data = Boston)
## #> Call:
## #> gam(formula = f1, data = Boston)
## #>
## #> Degrees of Freedom: 505 total; 493.0002 Residual
## #> Residual Deviance: 26249.62
##
## Created on 2023-06-01 with reprex v2.0.2
## But column 4 cannot be smoothed due to:
## A smoothing variable encountered with 3 or less unique values; at least 4 needed |Baraliuh |2023-06-02 00:31:40Z |1919 | 11| 5| 6|
## | 76386409|You may try
## names_Boston <- names(Boston[,1:3])
## f1 <- paste("medv ~", paste0(sprintf("s(%s)", names_Boston), collapse = " + "))
##
## f1
## [1] "medv ~ s(crim) + s(zn) + s(indus)"
##
##
## gam(as.formula(f1), data = Boston)
##
## Call:
## gam(formula = as.formula(f1), data = Boston)
##
## Degrees of Freedom: 505 total; 493.0002 Residual
## Residual Deviance: 26249.62
##
## gam(medv ~ s(crim) + s(zn) + s(indus), data = Boston)
##
## Call:
## gam(formula = medv ~ s(crim) + s(zn) + s(indus), data = Boston)
##
## Degrees of Freedom: 505 total; 493.0002 Residual
## Residual Deviance: 26249.62 |Park |2023-06-02 00:26:53Z |14666 | 29| 9| 0|
## | 76386356|In R, there are many factors which can affect performance. In order for others to provide 'good' answers, it would be helpful if you could edit your question to include your current runtime, the sort of speed-up you expect, and a minimal reproducible example (alternatively, describe any differences between your actual data and my example below, e.g. how long is your target_codes vector, what are the dimensions of your dataframes, etc). Also, how many dataframes are you applying this to, and have you looked at other alternatives, e.g. data.table, or other languages, e.g. awk?
## Your code didn't run 'as is' on my example dataset, so I ran an altered version. I also tested PCRE regex (perl = TRUE in the grep) which can improve speed for reasons:
## library(tidyverse)
##
## match_targets_dplyr <- function(df, target_codes){
## pattern = paste0("^", target_codes, collapse = "|")
## df <- filter(df, if_any(everything(), ~grepl(pattern, .x)))
## return(df)
## }
##
## match_targets_dplyr_perl <- function(df, target_codes){
## pattern = paste0("^", target_codes, collapse = "|")
## df <- filter(df, if_any(everything(), ~grepl(pattern, .x, perl = TRUE)))
## return(df)
## }
##
## match_targets_base <- function(df, target_codes){
## pattern = paste0("^", target_codes, collapse = "|")
## df <- df[rowSums(apply(df, 2, \(x) grepl(pattern, x))) > 0, , drop = FALSE]
## return(df)
## }
##
## match_targets_base_perl <- function(df, target_codes){
## pattern = paste0("^", target_codes, collapse = "|")
## df <- df[rowSums(apply(df, 2, \(x) grepl(pattern, x, perl = TRUE))) > 0, , drop = FALSE]
## return(df)
## }
##
## # grab some data
## df <- read_csv("https://raw.github.com/VladAluas/Text_Analysis/master/Datasets/Text_review.csv")
## #> Rows: 433 Columns: 3
## #> ââ Column specification ââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
## #> Delimiter: ","
## #> chr (3): Model, Segment, Text
## #>
## #> â¹ Use `spec()` to retrieve the full column specification for this data.
## #> â¹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## df_large <- df
##
## # the "Text" field has some non-standard characters; remove them
## df_large$Text <- iconv(df_large$Text, from = "UTF-8", to = "ASCII", sub = "")
##
## # select some target codes
## target_codes = c("Perform", "Ca", "Net")
##
## # check you get the same answer from all of the functions
## df2 <- match_targets_dplyr(df_large, target_codes)
## df3 <- match_targets_dplyr_perl(df_large, target_codes)
## df4 <- match_targets_base(df_large, target_codes)
## df5 <- match_targets_base_perl(df_large, target_codes)
## all.equal(df2, df3, df4, df5, check.attributes = FALSE)
## #> [1] TRUE
##
## # increase the size of the data to ~28mil rows
## repeat{df_large <- df_large %>% bind_rows(df_large); if(NROW(df_large) > 2.8e7) {break}}
##
## # benchmark the functions on the 28mil rows
## library(microbenchmark)
## res <- microbenchmark(match_targets_dplyr(df_large, target_codes),
## match_targets_dplyr_perl(df_large, target_codes),
## match_targets_base(df_large, target_codes),
## match_targets_base_perl(df_large, target_codes),
## times = 2)
## autoplot(res)
##
##
## Created on 2023-06-02 with reprex v2.0.2
## It's hard to see what's going on in the reprex image; here it is with the points highlighted:
##
## TL,DR: dplyr is a bit faster, and PCRE was much faster using this example dataset. |jared_mamrot |2023-06-02 05:04:36Z |21404 | 43| 20| 4|
## | 76386347|Use which(sapply()) to get indices of numeric columns, then data.table::set() to transform:
## library(data.table)
##
## logDT <- DT
##
## for (j in which(sapply(logDT, is.numeric))) {
## set(logDT, j = j, value = log(logDT[[j]], 2) + 1)
## }
##
## logDT
##
## x v1 v2
## 1: b 0.148809 -0.2669889
## 2: b NaN 1.2980794
## 3: b 1.827952 -1.0789923
## 4: a -1.416422 NaN
## 5: a 1.192227 1.1442347
##
## Example data:
## set.seed(13)
##
## DT <- data.table(x = c("b","b","b","a","a"), v1 = rnorm(5), v2 = rnorm(5)) |zephryl |2023-06-02 02:12:34Z |14244 | 29| 11| 3|
## | 76386162|replace(df1, is.na(df1), df2[is.na(df1)])
##
## X1 X2 X3 X4 X5
## R1 80 98 150 599 650
## R2 90 95 254 725 NA
## R3 93 96 540 650 NA
##
## library(tidyverse)
## map2_df(df1, df2, coalesce)
##
## # A tibble: 3 Ã 5
## X1 X2 X3 X4 X5
## <int> <int> <int> <int> <int>
## 1 80 98 150 599 650
## 2 90 95 254 725 NA
## 3 93 96 540 650 NA |Onyambu |2023-06-01 22:52:57Z |64533 | 53| 23| 3|
## | 76386162|Here is a dplyr solution:
## library(dplyr)
##
## new_data_one %>%
## mutate(across(everything(), ~ coalesce(., new_data_two[[cur_column()]])))
##
## X1 X2 X3 X4 X5
## R1 80 98 150 599 650
## R2 90 95 254 725 NA
## R3 93 96 540 650 NA |TarJae |2023-06-01 23:15:19Z |70408 | 62| 18| 6|
## | 76386104|There might be a cleaner option which I haven't found in the docs but one option to fix the order of the stack would be to manipulate the ggplot object returned by likert.bar.plot directly and reverse the order of the stack by setting the position for the geom_col layer aka layer no. 1 to position_stack(reverse = TRUE):
## library(likert)
## #> Loading required package: ggplot2
## #> Loading required package: xtable
## library(tidyverse)
##
## n <- 3000
##
## set.seed(123)
##
## some_made_up_data <- data.frame(
## Q1a = as.factor(sample(c(1, 2, 3, 4, 5), n, replace = TRUE, prob = c(.03, .07, .2, .4, .3))),
## Q1b = as.factor(sample(c(1, 2, 3, 4, 5), n, replace = TRUE, prob = c(.02, .1, .2, .3, .3))),
## Q1c = as.factor(sample(c(1, 2, 3, 4, 5), n, replace = TRUE, prob = c(.05, .2, .2, .4, .2))),
## group = as.factor(sample(c("g1", "g2", "g3", "g4", "g5"), n, replace = TRUE))
## )
##
## L3 <- likert(some_made_up_data[, 1, drop = FALSE], grouping = some_made_up_data$group) %>%
## plot(
## type = "bar",
## centered = F
## )
##
## L3$layers[[1]]$position <- position_stack(reverse = TRUE)
##
## L3 |stefan |2023-06-01 23:02:10Z |84193 | 50| 25| 6|
## | 76386043|just do:
## p_values <- apply(df[-1], 1, function(row) {
## t_test_result <- t.test(row[1:5], row[6:12])
## t_test_result$p.value
## })
##
## p_values
## [1] 0.1425172 0.6840726 0.3266262 |Onyambu |2023-06-01 22:26:45Z |64533 | 53| 23| 3|
## | 76385903|This one-liner works:
## randomTable <- ogTable[, c(1:2, sample(3:ncol(ogTable), 50)), with = FALSE]
##
## A reprex:
## library(data.table)
## set.seed(1)
## test <- data.table(iris)
## randomTable <- test[, c(1:2, sample(3:ncol(test), 2)), with = FALSE]
##
## head(randomTable)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4 |Leroy Tyrone |2023-06-01 22:18:27Z |1206 | 23| 14| 6|
## | 76385903|Update after clarification:
## library(dplyr)
## set.seed(42)
##
## # Select 3 columns from mtcars randomly keeping the first two constant
## selected_columns <- sample(colnames(mtcars)[-c(1, 2)], 3)
##
## # Now combine the constant columns with the randomly select using `all_of()` function
## mtcars %>%
## select(mpg, cyl, all_of(selected_columns))
##
## first answer:
## sample_n from dplyr is used for sampling rows (e.g. observations). See ?sample_n: sample_n() and sample_frac() have been superseded in favour of slice_sample().
## We could do it this way to randomly select 50 columns:
## randomTable <- ogTable[, sample(ncol(ogTable), 50)] |TarJae |2023-06-01 21:50:50Z |70408 | 62| 18| 0|
## | 76385735|I'm not sure how to make this happen with an "out-of-the-box" function, though I don't doubt that it's possible. First, let's make the data then I'll explain the solution.
## library(dplyr)
## library(lubridate)
## df <- data.frame("date_index"=c("20230104", "20230104", "20230104", "20230106", "20230106", "20230106"),
## "f_date"=c("20230201", "20230203", "20230210", "20230201", "20230203", "20230210"),
## "value" = c(1.50, 2.00, 3.25, 4.00, 3.00, 2.50)) %>%
## mutate(date_index=ymd(date_index), f_date=ymd(f_date))
##
## I wrote a function that does an interpolation with a piecewise-linear regression model that is just identified - it has as many parameters as there are non-missing values of the value variable. It then uses that model to generate predictions using the observed values of value as knots.
## interp_fun <- function(x,y){
## pwl <- function(x, k)sapply(k, function(K)ifelse(x >= K, x-K, 0))
## w <- which(!is.na(y))
## if(length(w) > 2){
## w <- w[-c(1, length(w))]
## kts <- x[w]
## mod <- lm(y ~ x + pwl(x, kts), na.action=na.exclude)
## ifelse(is.na(y), predict(mod, newdata=data.frame(x=x)), y)
## }else{
## mod <- lm(y ~ x, na.action=na.exclude)
## ifelse(is.na(y), predict(mod, newdata=data.frame(x=x)), y)
## }
## }
##
## With the interpolation function in hand, you can group the data by date_index, then generate a new sequence of dates from the minimum to the maximum of f_date and then merge that back into the original data. You can group by date_index and then apply the interpolation function to f_date and value. The result appears as though it's the one you want. This should work regardless of how many "interior" observed values you have between the observed endpoints.
##
## df %>%
## group_by(date_index) %>%
## arrange(f_date, .by_group = TRUE) %>%
## reframe(f_date = seq(from = min(f_date),
## to = max(f_date),
## by = "1 day")) %>%
## left_join(df) %>%
## group_by(date_index) %>%
## mutate(value = interp_fun(f_date, value))
## #> Joining with `by = join_by(date_index, f_date)`
## #> # A tibble: 20 Ã 3
## #> # Groups: date_index [2]
## #> date_index f_date value
## #> <date> <date> <dbl>
## #> 1 2023-01-04 2023-02-01 1.5
## #> 2 2023-01-04 2023-02-02 1.75
## #> 3 2023-01-04 2023-02-03 2
## #> 4 2023-01-04 2023-02-04 2.18
## #> 5 2023-01-04 2023-02-05 2.36
## #> 6 2023-01-04 2023-02-06 2.54
## #> 7 2023-01-04 2023-02-07 2.71
## #> 8 2023-01-04 2023-02-08 2.89
## #> 9 2023-01-04 2023-02-09 3.07
## #> 10 2023-01-04 2023-02-10 3.25
## #> 11 2023-01-06 2023-02-01 4
## #> 12 2023-01-06 2023-02-02 3.50
## #> 13 2023-01-06 2023-02-03 3
## #> 14 2023-01-06 2023-02-04 2.93
## #> 15 2023-01-06 2023-02-05 2.86
## #> 16 2023-01-06 2023-02-06 2.79
## #> 17 2023-01-06 2023-02-07 2.71
## #> 18 2023-01-06 2023-02-08 2.64
## #> 19 2023-01-06 2023-02-09 2.57
## #> 20 2023-01-06 2023-02-10 2.5
##
## Created on 2023-06-01 with reprex v2.0.2 |DaveArmstrong |2023-06-01 22:35:33Z |17287 | 25| 11| 18|
## | 76385735|Here is a one liner using the tidyr::complete() function and the approx()
## library(dplyr)
## library(tidyr)
## library(lubridate)
##
## df %>% group_by(date_index) %>%
## complete(f_date=full_seq(f_date, 1)) %>%
## mutate(value=approx(f_date[!is.na(.$value)], y=value[!is.na(.$value)], xout=f_date, method = "linear")$y)
##
## # A tibble: 20 Ã 3
## # Groups: date_index [2]
## date_index f_date value
## <date> <date> <dbl>
## 1 2023-01-04 2023-02-01 1.5
## 2 2023-01-04 2023-02-02 1.75
## 3 2023-01-04 2023-02-03 2
## 4 2023-01-04 2023-02-04 2.18
## 5 2023-01-04 2023-02-05 2.36
## 6 2023-01-04 2023-02-06 2.54
## 7 2023-01-04 2023-02-07 2.71
## 8 2023-01-04 2023-02-08 2.89
## 9 2023-01-04 2023-02-09 3.07
## 10 2023-01-04 2023-02-10 3.25
## 11 2023-01-06 2023-02-01 4
## 12 2023-01-06 2023-02-02 3.5
## 13 2023-01-06 2023-02-03 3
## 14 2023-01-06 2023-02-04 2.93
## 15 2023-01-06 2023-02-05 2.86
## 16 2023-01-06 2023-02-06 2.79
## 17 2023-01-06 2023-02-07 2.71
## 18 2023-01-06 2023-02-08 2.64
## 19 2023-01-06 2023-02-09 2.57
## 20 2023-01-06 2023-02-10 2.5
##
## The complete function is a handy to have and will fill-in the missing values in a given sequence, in this case in 1 day intervals.
## Once the missing dates are define, then simply use the approx function to interpolate for the NA values. |Dave2e |2023-06-01 23:02:51Z |21677 | 48| 41| 0|
## | 76385735|Here is my take on it using summarise and unnest:
## library(dplyr, warn.conflicts = FALSE)
## library(lubridate, warn.conflicts = FALSE)
## library(tidyr)
## interpolator <- function(x, y) {
## date_first <- first(x)
## date_last <- last(x)
## interpolant <- approxfun(x = x, y = y)
## date_seq <- seq.Date(ymd(date_first), ymd(date_last), by = 1)
## tibble(
## f_date = date_seq, value = interpolant(date_seq)
## )
## }
##
## df <- data.frame("date_index"=c("20230104", "20230104", "20230104", "20230106", "20230106", "20230106"),
## "f_date"=c("20230201", "20230203", "20230210", "20230201", "20230203", "20230210"),
## "value" = c(1.50, 2.00, 3.25, 4.00, 3.00, 2.50)) %>%
## mutate(date_index=ymd(date_index), f_date=ymd(f_date))
##
## df %>%
## group_by(date_index) %>%
## arrange(f_date) %>%
## summarise(
## tmp = list(interpolator(f_date, value))
## ) %>%
## unnest(tmp)
## #> # A tibble: 20 Ã 3
## #> date_index f_date value
## #> <date> <date> <dbl>
## #> 1 2023-01-04 2023-02-01 1.5
## #> 2 2023-01-04 2023-02-02 1.75
## #> 3 2023-01-04 2023-02-03 2
## #> 4 2023-01-04 2023-02-04 2.18
## #> 5 2023-01-04 2023-02-05 2.36
## #> 6 2023-01-04 2023-02-06 2.54
## #> 7 2023-01-04 2023-02-07 2.71
## #> 8 2023-01-04 2023-02-08 2.89
## #> 9 2023-01-04 2023-02-09 3.07
## #> 10 2023-01-04 2023-02-10 3.25
## #> 11 2023-01-06 2023-02-01 4
## #> 12 2023-01-06 2023-02-02 3.5
## #> 13 2023-01-06 2023-02-03 3
## #> 14 2023-01-06 2023-02-04 2.93
## #> 15 2023-01-06 2023-02-05 2.86
## #> 16 2023-01-06 2023-02-06 2.79
## #> 17 2023-01-06 2023-02-07 2.71
## #> 18 2023-01-06 2023-02-08 2.64
## #> 19 2023-01-06 2023-02-09 2.57
## #> 20 2023-01-06 2023-02-10 2.5
##
## Created on 2023-06-01 with reprex v2.0.2 |Baraliuh |2023-06-01 22:47:29Z |1919 | 11| 5| 0|
## | 76385712|Here's an example of boxplots with daily data that is in POSIXct:
## set.seed(42)
## df <- data.frame(date = sample(lubridate::ymd_h(c(
## 2023040100, 2023050800, 2023051500, 2023051600)), 100, replace = TRUE),
## mgL = rnorm(100, mean = 29))
##
## The default spacing for boxplots on a continuous axis will find a resolution that fits the space.
## ggplot(df, aes(date, mgL, group = date)) +
## geom_boxplot()
##
##
## If we specify width = 1, the boxes will be 1 unit wide, which in POSIXct is 1 second, resulting in boxes that are far too narrow.
## ggplot(df, aes(date, mgL, group = date)) +
## geom_boxplot(width = 1)
##
##
## Here with 7-day spacing, the first two bars are one week wide, but the last two (which occur one day apart) are squeezed together with narrower bars.
## ggplot(df, aes(date, mgL, group = date)) +
## geom_boxplot(width = 60*60*24*7)
##
##
## Or we could convert the x axis to Date, where one unit is a day. With width 1, the last two bars will be lined up with no space between them. Any wider and ggplot will squeeze the overlapping ones to avoid an overlap.
## ggplot(df, aes(as.Date(date), mgL, group = date)) +
## geom_boxplot(width = 1) |Jon Spring |2023-06-01 23:09:56Z |52995 | 52| 35| 4|
## | 76385712|The issue is the datetime Class. Try the whole process by leaving date as character the width should work.
## this explanation is taken directly from @Jon Spring comments:
## "boxplot width 1 will make the boxes 1 unit wide. With POSIXct in the x axis, 1 unit is 1 second, which is far too narrow for this data. Try width = 60 * 60 * 24 * 5 to make them 5 days wide, for instance. Tricky b/c I see some are closer than that so you'll have overlaps"
## library(lubridate)
## library(dplyr)
##
## df %>%
## mutate(date = ymd_hms(date)) %>%
## ggplot(aes(x = date, y = Chloride_mgL, group = date)) +
## geom_boxplot(width = 2, outlier.shape = NA, color = "dark blue") +
## theme_minimal() +
## labs(y ="Chloride (mg/L)")
##
##
##
## library(dplyr)
## df %>%
## ggplot(aes(x = date, y = Chloride_mgL, group = date)) +
## geom_boxplot(width = 2, outlier.shape = NA, color = "dark blue") +
## theme_minimal() +
## labs(y = "Chloride (mg/L)") |TarJae |2023-06-01 21:35:51Z |70408 | 62| 18| 6|
## | 255697|I've only come across both R and the Dirichlet distribution in passing, so I hope I'm not too much off the mark.
##
## This mailing list message seems to answer your question:
##
##
## Scrolling through the results of
## RSiteSearch("dirichlet") suggests some useful tools
## in the VGAM package. The gtools package and
## MCMC packages also have ddirichlet() functions
## that you could use to construct a (negative log) likelihood
## function and optimize with optim/nlmin/etc.
##
##
## The deal, DPpackage and mix packages also may or may not provide what you need.
##
## Then again, these are all still CRAN packages, so I'm not sure if you already found these and found them unsuitable.
##
## As for searching for R, the R project site itself already provides a few links on its search page. |mercator |2008-11-01 19:29:54Z |28190 | 72| 63| 8|
## | 127137|Why not look at www.r-project.org under documentation and read at least the introduction? The language is sufficiently different from what you're used to that just looking at code samples won't be enough for you to pick it up. (At least, not beyond basic calculator-like functionality.)
##
## If you want to look a bit deeper, you might want to look at CRAN: an online collection of R modules with source code: cran.r-project.org |zvrba |2008-09-24 13:33:48Z |24102 | 65| 54| 3|
## | 127137|I just found this question and thought I would add a few resources to it. I really like the Quick-R site:
##
## http://www.statmethods.net/
##
## Muenchen has written a book about using R if you come from SAS or SPSS. Originally it was an 80 page online doc that Springer encouraged him to make a 400+ page book out of. The original short form as well as the book are here:
##
## http://rforsasandspssusers.com/
##
## You've probably already seen these, but worth listing:
## http://cran.r-project.org/doc/manuals/R-intro.pdf
## http://cran.r-project.org/doc/contrib/Owen-TheRGuide.pdf
## http://cran.r-project.org/doc/contrib/Kuhnert+Venables-R_Course_Notes.zip
##
## I don't want to sound like a trite RTFM guy, but the help files generally have great short snips of working code as examples. I'm no R pro so I end up having to deconstruct the examples to understand them. That process, while tedious, is really useful.
##
## Good luck!
##
## EDIT: well I hesitated to be self linking (it feels a bit masturbatory) but here's my own list of R resources with descriptions and comments on each: http://www.cerebralmastication.com/?page_id=62 |JD Long |2008-11-24 15:01:37Z |59385 | 293| 201| 57|
## | 127137|The Rosetta Code project shows R compared to other languages. |Gregg Lind |2008-09-26 19:51:12Z |20550 | 81| 66| 15|
## | 127137|How about CRAN? You've got over a thousand packages of code to choose from. |hadley |2008-09-25 11:45:38Z |101442 | 244| 183| 31|
## | 127137|The simplest way of seeing code, is to
##
## install R
## type "help.start()" or look at online documentation, to get names of functions
## type the function name at the prompt
## This will print the source code right at the prompt, and illustrate all manner of odd and interesting syntax corners. |Gregg Lind |2008-09-25 13:14:20Z |20550 | 81| 66| 15|
## | 127137|The Learning R blog has a lot of good examples. Lately, the author has been doing a visualization series, comparing Lattice and ggplot2. |ars |2009-07-26 09:29:21Z |119591 | 134| 145| 23|
## | 127137|It is hard to google r, because of it being too short a name. Try http://rseek.org/, which provides an r-customized Google search instead. Search on examples, code in repositories, etc. |Chang Chung |2009-02-22 04:49:50Z |2307 | 16| 17| 5|
## | 127137|Some simple examples can be found at Mathesaurus - if you know e.g. Python or Matlab, look at the respective comparison charts to find the R idioms that correspond to your familiar idioms in the other language. |Jouni K. Seppänen |2009-03-26 18:03:47Z |42879 | 100| 71| 3|
## | 127137|I use the R Graph Gallery. It has been a lot of help on graphing itself. Lots of good examples.
##
## #R on Freenode has also been very useful. |amoeba |2009-05-04 20:36:26Z |3940 | 14| 21| 5|
## | 127137|http://had.co.nz/ggplot2/ has a lot of graphics with example code. And you only need one package to create almost every graph you need. |Thierry |2009-07-23 21:24:48Z |17999 | 66| 47| 56|
## | 127137|There is also the R Wiki which is slowly growing. |Dirk Eddelbuettel |2009-07-26 21:14:15Z |358804 | 722| 638| 16|
## | 127137|As you probably know, R and S are pretty similar (apart from the cost!).
##
## I use to use both, and I highly recommend S Poetry.
##
## I can also highly recommend the M.J. Crawley book, and the shorter Venables & Ripley one. |Galwegian |2008-09-24 13:37:30Z |41435 | 158| 111| 8|
## | 127137|here are links to the R project group on Linkedin. I put together this list of links and a lot of people have found it useful (some have also made very useful additions) |harshsinghal |2010-02-18 18:44:50Z |3710 | 32| 35| 22|
## | 127137|Use Google Code Search with command "lang:r" and your keyword(s) |waanders |2010-08-05 16:52:40Z |8847 | 101| 70| 11|
## | 127137|Steve McIntyre at http://www.climateaudit.org/ is a big fan of R and often posts working code.
##
## There is a scripts category, and the Statistics and R lists some other resources |Will Dean |2008-09-24 13:38:20Z |39029 | 118| 89| 0|
## | 103312|The readLines function will return a zero-length value when it reaches the EOF. |Ben Hoffstein |2008-09-19 16:20:30Z |101899 | 120| 104| 8|
## | 103312|Try checking the length of data returned by readBin:
##
## while (length(a <- readBin(f, 'int', n=1)) > 0) {
## # do something
## } |ars |2009-07-30 07:09:28Z |119591 | 134| 145| 23|
## | 102056|http://rseek.org is a great search engine for R manuals, mailing lists, and various websites. It's a Google syndicated search app with specialized UI. I always use it. |Brendan OConnor |2009-07-22 06:58:23Z |9564 | 25| 27| 3|
## | 102056|An update, several years later
##
## All the links you need are right here:
##
## https://stackoverflow.com/tags/r/info
##
## This was discussed on the R-Help mailing list recently.
##
## Some things mentioned there that haven't been covered here are:
##
## Using the RSiteSearch function, and the package of the same name.
## Using R-specific search engines. You mentioned RSeek and RSearch. You can also search the R mail archive, the help wiki, the task views, RForge, and Bioconductor among other places. |Richie Cotton |2009-07-09 12:18:03Z |117586 | 359| 245| 46|
## | 102056|Most of the time I find googling for R plus my searching term works fine. When it doesn't, I'll try using "R project", or adding CRAN, statistic or language to the search.
##
## Is there a particular topic that you're having problems searching for? |hadley |2008-09-20 23:02:51Z |101442 | 244| 183| 31|
## | 102056|A new CRAN package is extremely helpful for this: check out the "sos" package. |Shane |2009-09-12 18:02:34Z |98232 | 217| 223| 35|
## | 102056|I would just add, one great way to search for R script is to type your search term into google with "ext:r" at the end. This will return all files that have the R extension. For instance:
##
## If you wanted some high performance computing examples, this returns Russ Lenth's "R code used in Netflix analyses" from Luke Tierney and Kate Cowles "High Performance Computing in Statistics" course.
## If you wanted examples of bootstrapping, this returns many scripts, most of which look very relevant.
## I usually do my basic R searches with "r-project" at the beginning, since most people who refer to R in any great detail will usually also reference the site. |Shane |2009-10-16 13:25:10Z |98232 | 217| 223| 35|
## | 102056|Joining this discussion very late, but here is my preferred search string in Google: [R] followed by search string.
##
## For example:
##
## [R] lm finds several links to linear modelling in R
##
## The reason this works is that StackOverflow uses the [r] tag, and the R mailing lists also use [R]. |Andrie |2011-04-21 14:17:33Z |175506 | 494| 446| 47|
## | 102056|CRAN is the authoritative place to look for R material. |lindelof |2008-09-19 14:14:03Z |34324 | 139| 98| 31|
## | 102056|Search for "S-PLUS" instead. R and S-PLUS are siblings, but the latter is easier to search for. |John D. Cook |2008-10-18 19:56:51Z |29493 | 93| 67| 10|
## | 102056|I typically use r-seek.org, but you can "search exactly as is" with Google by putting a + immediately before R.
##
##
## By attaching a + immediately before a
## word (remember, don't add a space
## after the +), you are telling Google
## to match that word precisely as you
## typed it. Putting double quotes around
## a single word will do the same thing.
##
##
## For example: +R cbind |Joshua Ulrich |2011-04-21 15:36:52Z |172592 | 416| 336| 32|
## | 102056|google for "r language" (with the quotes) and then your search terms. |tzot |2008-09-19 14:11:48Z |91983 | 203| 140| 29|
## | 102056|Typing .R into Google search box instead of just R helps. |user185072 |2009-10-06 15:37:38Z |0 | 77| 56| 14|
## | 102056|Similar to @MikeKSmith's answer, type R+ into the search box |Stedy |2010-04-29 01:08:44Z |7319 | 130| 104| 21|
## | 102056|To find questions/answers on Stack Overflow, I always; go to Tags, type R, find the R tag and click on it.
##
## Jeff mentioned a better way to search for the R
## Tag on the podcast, but I've since deleted it. :-(
##
## Discussion aside, Stack Overflow (or one of the sister sites) would be a great resource for R users. The very high volume R-help email list could be reduced by sending Noobies like myself to specific places here. One confounding issue is that while the questions are mostly about the R language, they are often about the proper statistical test or algorithm for the problem.
##
## RWFarley |user149173 |2009-08-10 20:32:15Z |31 | 174| 122| 22|
## | 102056|You can use this site:
##
## http://www.dangoldstein.com/search_r.html,
## "Search the R Statistical Language".
##
## Has "R Multi-site search powered by Google" and "R Multi-
## site search powered by Rollyo".
##
## Note that it requires JavaScript to work (can be restricted
## to www.dangoldstein.com and google.com if your browser setup
## allows it - e.g. using NoScript in Firefox). |Peter Mortensen |2009-08-21 01:02:57Z |31021 | 49| 44| 13|
## | 102056|GitHub's advanced search with a language constraint can be useful. Try this: language:R lubridate for example. |David J. |2012-06-21 22:09:22Z |31310 | 53| 32| 9|
## | 102056|You could always search for "R stats", considering R is a statistical program.
##
## Edit:
## http://www.google.com/search?source=ig&hl=en&rlz=&q=R+stats&btnG=Google+Search
##
## The first page shows plenty of relevant results. |echoblaze |2008-09-19 14:05:20Z |11116 | 324| 236| 92|
## | 102056|Adding "site:r-project.org" will help narrow down the results to only things on the official project web site. YMMV. |David Locke |2008-09-19 15:07:01Z |17836 | 33| 27| 3|
## | 102056|How about "R statistical" or "R package"?
##
## Also, restrict your search to the domain cran.r-project.org. For example, searching for how to use ifelse in R:
##
## ifelse site:cran.r-project.org |Frank |2009-07-08 13:22:01Z |63722 | 2| 234| 48|
## | 102056|for your original question, i.e. how to search in google: one of my previous colleagues suggested to use keyword "r-help" instead of "r" together with your question when searching in google. It searches in the mailing list for answers. That always works for me. |HongboZhu |2010-05-18 09:02:23Z |4422 | 371| 0| 0|
## | 102056|When googling, "in R" works well instead of just "R". |Ophedia |2010-08-25 17:47:26Z |33 | 0| 0| 0|
## | 102056|Just type what you want to do, e.g. "R merge data frame" in google that works great! I don't read any materials, just use google as I type R code. It's just great!!! |Tomas |2011-07-23 09:22:31Z |57264 | 0| 0| 0|
## | 95007|You're understandably confused. That documentation is terrible. I had to go back to the paper its based on (Hyndman, R.J.; Fan, Y. (November 1996). "Sample Quantiles in Statistical Packages". American Statistician 50 (4): 361â365. doi:10.2307/2684934) to get an understanding. Let's start with the first problem.
##
## where 1 <= i <= 9, (j-m)/n <= p < (j-m+1)/ n, x[j] is the jth order statistic, n is the sample size, and m is a constant determined by the sample quantile type. Here gamma depends on the fractional part of g = np+m-j.
##
## The first part comes straight from the paper, but what the documentation writers omitted was that j = int(pn+m). This means Q[i](p) only depends on the two order statistics closest to being p fraction of the way through the (sorted) observations. (For those, like me, who are unfamiliar with the term, the "order statistics" of a series of observations is the sorted series.)
## Also, that last sentence is just wrong. It should read
##
## Here gamma depends on the fractional part of np+m, g = np+m-j
##
## As for m that's straightforward. m depends on which of the 9 algorithms was chosen. So just like Q[i] is the quantile function, m should be considered m[i]. For algorithms 1 and 2, m is 0, for 3, m is -1/2, and for the others, that's in the next part.
##
## For the continuous sample quantile types (4 through 9), the sample quantiles can be obtained by linear interpolation between the kth order statistic and p(k):
## p(k) = (k - alpha) / (n - alpha - beta + 1), where α and β are constants determined by the type. Further, m = alpha + p(1 - alpha - beta), and gamma = g.
##
## This is really confusing. What the documentation calls p(k) is not the same as the p from before. p(k) is the plotting position. In the paper, the authors write it as pk, which helps. Especially since in the expression for m, the p is the original p, and the m = alpha + p * (1 - alpha - beta). Conceptually, for algorithms 4-9, the points (pk, x[k]) are interpolated to get the solution (p, Q[i](p)). Each algorithm only differs in the algorithm for the pk.
## As for the last bit, R is just stating what S uses.
## The original paper gives a list of 6 "desirable properties for a sample quantile" function, and states a preference for #8 which satisfies all by 1. #5 satisfies all of them, but they don't like it on other grounds (it's more phenomenological than derived from principles). #2 is what non-stat geeks like myself would consider the quantiles and is what's described in wikipedia.
## BTW, in response to dreeves answer, Mathematica does things significantly differently. I think I understand the mapping. While Mathematica's is easier to understand, (a) it's easier to shoot yourself in the foot with nonsensical parameters, and (b) it can't do R's algorithm #2. (Here's Mathworld's Quantile page, which states Mathematica can't do #2, but gives a simpler generalization of all the other algorithms in terms of four parameters.) |AFoglia |2009-09-22 23:58:43Z |7950 | 50| 35| 3|
## | 95007|There are various ways of computing quantiles when you give it a vector, and don't have a known CDF.
##
## Consider the question of what to do when your observations don't fall on quantiles exactly.
##
## The "types" are just determining how to do that. So, the methods say, "use a linear interpolation between the k-th order statistic and p(k)".
##
## So, what's p(k)? One guy says, "well, I like to use k/n". Another guy says, "I like to use (k-1)/(n-1)" etc. Each of these methods have different properties that are better suited for one problem or another.
##
## The \alpha's and \beta's are just ways to parameterize the functions p. In one case, they're 1 and 1. In another case, they're 3/8 and -1/4. I don't think the p's are ever a constant in the documentation. They just don't always show the dependency explicitly.
##
## See what happens with the different types when you put in vectors like 1:5 and 1:6.
##
## (also note that even if your observations fall exactly on the quantiles, certain types will still use linear interpolation). |Baltimark |2008-09-18 18:49:09Z |9012 | 35| 36| 12|
## | 95007|I believe the R help documentation is clear after the revisions noted in @RobHyndman's comment, but I found it a bit overwhelming. I am posting this answer in case it helps someone move quickly through the options and their assumptions.
##
## To get a grip on quantile(x, probs=probs), I wanted to check out the source code. This too was trickier than I anticipated in R so I actually just grabbed it from a github repo that looked recent enough to run with. I was interested in the default (type 7) behavior, so I annotated that some, but didn't do the same for each option.
##
## You can see how the "type 7" method interpolates, step by step, both in the code and also I added a few lines to print some important values as it goes.
##
## quantile.default <-function(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE
## , type = 7, ...){
## if(is.factor(x)) { #worry about non-numeric data
## if(!is.ordered(x) || ! type %in% c(1L, 3L))
## stop("factors are not allowed")
## lx <- levels(x)
## } else lx <- NULL
## if (na.rm){
## x <- x[!is.na(x)]
## } else if (anyNA(x)){
## stop("missing values and NaN's not allowed if 'na.rm' is FALSE")
## }
## eps <- 100*.Machine$double.eps #this is to deal with rounding things sensibly
## if (any((p.ok <- !is.na(probs)) & (probs < -eps | probs > 1+eps)))
## stop("'probs' outside [0,1]")
##
## #####################################
## # here is where terms really used in default type==7 situation get defined
##
## n <- length(x) #how many observations are in sample?
##
## if(na.p <- any(!p.ok)) { # set aside NA & NaN
## o.pr <- probs
## probs <- probs[p.ok]
## probs <- pmax(0, pmin(1, probs)) # allow for slight overshoot
## }
##
## np <- length(probs) #how many quantiles are you computing?
##
## if (n > 0 && np > 0) { #have positive observations and # quantiles to compute
## if(type == 7) { # be completely back-compatible
##
## index <- 1 + (n - 1) * probs #this gives the order statistic of the quantiles
## lo <- floor(index) #this is the observed order statistic just below each quantile
## hi <- ceiling(index) #above
## x <- sort(x, partial = unique(c(lo, hi))) #the partial thing is to reduce time to sort,
## #and it only guarantees that sorting is "right" at these order statistics, important for large vectors
## #ties are not broken and tied elements just stay in their original order
## qs <- x[lo] #the values associated with the "floor" order statistics
## i <- which(index > lo) #which of the order statistics for the quantiles do not land on an order statistic for an observed value
##
## #this is the difference between the order statistic and the available ranks, i think
## h <- (index - lo)[i] # > 0 by construction
## ## qs[i] <- qs[i] + .minus(x[hi[i]], x[lo[i]]) * (index[i] - lo[i])
## ## qs[i] <- ifelse(h == 0, qs[i], (1 - h) * qs[i] + h * x[hi[i]])
## qs[i] <- (1 - h) * qs[i] + h * x[hi[i]] # This is the interpolation step: assemble the estimated quantile by removing h*low and adding back in h*high.
## # h is the arithmetic difference between the desired order statistic amd the available ranks
## #interpolation only occurs if the desired order statistic is not observed, e.g. .5 quantile is the actual observed median if n is odd.
## # This means having a more extreme 99th observation doesn't matter when computing the .75 quantile
##
##
## ###################################
## # print all of these things
##
## cat("floor pos=", c(lo))
## cat("\nceiling pos=", c(hi))
## cat("\nfloor values= ", c(x[lo]))
## cat( "\nwhich floors not targets? ", c(i))
## cat("\ninterpolate between ", c(x[lo[i]]), ";", c(x[hi[i]]))
## cat( "\nadjustment values= ", c(h))
## cat("\nquantile estimates:")
##
## }else if (type <= 3){## Types 1, 2 and 3 are discontinuous sample qs.
## nppm <- if (type == 3){ n * probs - .5 # n * probs + m; m = -0.5
## } else {n * probs} # m = 0
##
## j <- floor(nppm)
## h <- switch(type,
## (nppm > j), # type 1
## ((nppm > j) + 1)/2, # type 2
## (nppm != j) | ((j %% 2L) == 1L)) # type 3
##
## } else{
## ## Types 4 through 9 are continuous sample qs.
## switch(type - 3,
## {a <- 0; b <- 1}, # type 4
## a <- b <- 0.5, # type 5
## a <- b <- 0, # type 6
## a <- b <- 1, # type 7 (unused here)
## a <- b <- 1 / 3, # type 8
## a <- b <- 3 / 8) # type 9
## ## need to watch for rounding errors here
## fuzz <- 4 * .Machine$double.eps
## nppm <- a + probs * (n + 1 - a - b) # n*probs + m
## j <- floor(nppm + fuzz) # m = a + probs*(1 - a - b)
## h <- nppm - j
##
## if(any(sml <- abs(h) < fuzz)) h[sml] <- 0
##
## x <- sort(x, partial =
## unique(c(1, j[j>0L & j<=n], (j+1)[j>0L & j<n], n))
## )
## x <- c(x[1L], x[1L], x, x[n], x[n])
## ## h can be zero or one (types 1 to 3), and infinities matter
## #### qs <- (1 - h) * x[j + 2] + h * x[j + 3]
## ## also h*x might be invalid ... e.g. Dates and ordered factors
## qs <- x[j+2L]
## qs[h == 1] <- x[j+3L][h == 1]
## other <- (0 < h) & (h < 1)
## if(any(other)) qs[other] <- ((1-h)*x[j+2L] + h*x[j+3L])[other]
##
## }
## } else {
## qs <- rep(NA_real_, np)}
##
## if(is.character(lx)){
## qs <- factor(qs, levels = seq_along(lx), labels = lx, ordered = TRUE)}
## if(names && np > 0L) {
## names(qs) <- format_perc(probs)
## }
## if(na.p) { # do this more elegantly (?!)
## o.pr[p.ok] <- qs
## names(o.pr) <- rep("", length(o.pr)) # suppress <NA> names
## names(o.pr)[p.ok] <- names(qs)
## o.pr
## } else qs
## }
##
## ####################
##
## # fake data
## x<-c(1,2,2,2,3,3,3,4,4,4,4,4,5,5,5,5,5,5,5,5,5,6,6,7,99)
## y<-c(1,2,2,2,3,3,3,4,4,4,4,4,5,5,5,5,5,5,5,5,5,6,6,7,9)
## z<-c(1,2,2,2,3,3,3,4,4,4,4,4,5,5,5,5,5,5,5,5,5,6,6,7)
##
## #quantiles "of interest"
## probs<-c(0.5, 0.75, 0.95, 0.975)
##
## # a tiny bit of illustrative behavior
## quantile.default(x,probs=probs, names=F)
## quantile.default(y,probs=probs, names=F) #only difference is .975 quantile since that is driven by highest 2 observations
## quantile.default(z,probs=probs, names=F) # This shifts everything b/c now none of the quantiles fall on an observation (and of course the distribution changed...)... but
## #.75 quantile is stil 5.0 b/c the observations just above and below the order statistic for that quantile are still 5. However, it got there for a different reason.
##
## #how does rescaling affect quantile estimates?
## sqrt(quantile.default(x^2, probs=probs, names=F))
## exp(quantile.default(log(x), probs=probs, names=F)) |Michael Roswell |2019-11-07 18:15:19Z |1302 | 30| 11| 0|
## | 79709|use variables in the outer function instead of global variables. This gets you the best of both approaches: you're not mutating global state, and you're not copying a big wad of data. If you have to exit early, just return the partial results.
##
## (See the "Scope" section in the R manual: http://cran.r-project.org/doc/manuals/R-intro.html#Scope) |Allen |2008-09-17 03:48:29Z |5024 | 30| 22| 6|
## | 79709|Remember your Knuth. "Premature optimization is the root of all programming evil."
##
## Try the side effect free version. See if it meets your performance goals. If it does, great, you don't have a problem in the first place; if it doesn't, then use the side effects, and make a note for the next programmer that your hand was forced. |Rob Hansen |2008-09-17 04:11:08Z |119 | 9| 19| 5|
## | 79709|It's not going to make much difference to memory use, so you might as well make the code clean.
##
## Since R has copy-on-modify for variables, modifying the global object will have the same memory implications as passing something up in return values.
##
## If you store the outputs in a database (or even in a file) you won't have the memory use issues, and the data will be incrementally available as it is created, rather than just at the end. Whether it's faster with the database depends primarily on how much memory you are using: is the reduction is garbage collection going to pay for the cost of writing to disk.
##
## There are both time and memory profilers in R, so you can see empirically what the impacts are. |Thomas |2008-09-17 03:51:30Z |0 | 60| 48| 0|
## | 79709|FYI, here's a full sample toy solution that avoids side effects:
##
## outerfunc <- function(names) {
## templist <- list()
## for (aname in names) {
## templist[[aname]] <- innerfunc(aname)
## }
## templist
## }
##
## innerfunc <- function(aname) {
## retval <- NULL
## if ("one" %in% aname) retval <- c(1)
## if ("two" %in% aname) retval <- c(1,2)
## if ("three" %in% aname) retval <- c(1,2,3)
## retval
## }
##
## names <- c("one","two","three")
##
## name_vals <- outerfunc(names)
##
## for (name in names) assign(name, name_vals[[name]]) |James Lavin |2008-09-17 19:39:37Z |0 | 39| 25| 0|
## | 79709|I'm not sure I understand the question, but I have a couple of solutions.
##
## Inside the function, create a list of the vectors and return that.
## Inside the function, create an environment and store all the vectors inside of that. Just make sure that you return the environment in case of errors.
## in R:
##
## help(environment)
##
## # You might do something like this:
##
## outer <- function(datasets) {
## # create the return environment
## ret.env <- new.env()
## for(set in dataset) {
## tmp <- inner(set)
## # check for errors however you like here. You might have inner return a list, and
## # have the list contain an error component
## assign(set, tmp, envir=ret.env)
## }
## return(ret.env)
## }
##
## #The inner function might be defined like this
##
## inner <- function(dataset) {
## # I don't know what you are doing here, but lets pretend you are reading a data file
## # that is named by dataset
## filedata <- read.table(dataset, header=T)
## return(filedata)
## }
##
##
## leif |leif |2008-09-17 03:58:26Z |2983 | 0| 0| 0|
## | 79709|Third approach: inner function returns a reference to the large array, which the next statement inside the loop then dereferences and stores wherever it's needed (ideally with a single pointer store and not by having to memcopy the entire array).
##
## This gets rid of both the side effect and the passing of large datastructures. |pjz |2008-09-17 03:49:36Z |41551 | 0| 0| 0|
## | 79709|It's tough to say definitively without knowing the language/compiler used. However, if you can simply pass a pointer/reference to the object that you're creating, then the size of the object itself has nothing to do with the speed of the function calls. Manipulating this data down the road could be a different story. |Jeffrey |2008-09-17 03:43:22Z |1827 | 0| 0| 0|
## | 77434|I use the tail function:
##
## tail(vector, n=1)
##
##
## The nice thing with tail is that it works on dataframes too, unlike the x[length(x)] idiom. |lindelof |2008-09-17 13:32:45Z |34324 | 139| 98| 31|
## | 77434|To answer this not from an aesthetical but performance-oriented point of view, I've put all of the above suggestions through a benchmark. To be precise, I've considered the suggestions
##
## x[length(x)]
## mylast(x), where mylast is a C++ function implemented through Rcpp,
## tail(x, n=1)
## dplyr::last(x)
## x[end(x)[1]]]
## rev(x)[1]
## and applied them to random vectors of various sizes (10^3, 10^4, 10^5, 10^6, and 10^7). Before we look at the numbers, I think it should be clear that anything that becomes noticeably slower with greater input size (i.e., anything that is not O(1)) is not an option. Here's the code that I used:
##
## Rcpp::cppFunction('double mylast(NumericVector x) { int n = x.size(); return x[n-1]; }')
## options(width=100)
## for (n in c(1e3,1e4,1e5,1e6,1e7)) {
## x <- runif(n);
## print(microbenchmark::microbenchmark(x[length(x)],
## mylast(x),
## tail(x, n=1),
## dplyr::last(x),
## x[end(x)[1]],
## rev(x)[1]))}
##
##
## It gives me
##
## Unit: nanoseconds
## expr min lq mean median uq max neval
## x[length(x)] 171 291.5 388.91 337.5 390.0 3233 100
## mylast(x) 1291 1832.0 2329.11 2063.0 2276.0 19053 100
## tail(x, n = 1) 7718 9589.5 11236.27 10683.0 12149.0 32711 100
## dplyr::last(x) 16341 19049.5 22080.23 21673.0 23485.5 70047 100
## x[end(x)[1]] 7688 10434.0 13288.05 11889.5 13166.5 78536 100
## rev(x)[1] 7829 8951.5 10995.59 9883.0 10890.0 45763 100
## Unit: nanoseconds
## expr min lq mean median uq max neval
## x[length(x)] 204 323.0 475.76 386.5 459.5 6029 100
## mylast(x) 1469 2102.5 2708.50 2462.0 2995.0 9723 100
## tail(x, n = 1) 7671 9504.5 12470.82 10986.5 12748.0 62320 100
## dplyr::last(x) 15703 19933.5 26352.66 22469.5 25356.5 126314 100
## x[end(x)[1]] 13766 18800.5 27137.17 21677.5 26207.5 95982 100
## rev(x)[1] 52785 58624.0 78640.93 60213.0 72778.0 851113 100
## Unit: nanoseconds
## expr min lq mean median uq max neval
## x[length(x)] 214 346.0 583.40 529.5 720.0 1512 100
## mylast(x) 1393 2126.0 4872.60 4905.5 7338.0 9806 100
## tail(x, n = 1) 8343 10384.0 19558.05 18121.0 25417.0 69608 100
## dplyr::last(x) 16065 22960.0 36671.13 37212.0 48071.5 75946 100
## x[end(x)[1]] 360176 404965.5 432528.84 424798.0 450996.0 710501 100
## rev(x)[1] 1060547 1140149.0 1189297.38 1180997.5 1225849.0 1383479 100
## Unit: nanoseconds
## expr min lq mean median uq max neval
## x[length(x)] 327 584.0 1150.75 996.5 1652.5 3974 100
## mylast(x) 2060 3128.5 7541.51 8899.0 9958.0 16175 100
## tail(x, n = 1) 10484 16936.0 30250.11 34030.0 39355.0 52689 100
## dplyr::last(x) 19133 47444.5 55280.09 61205.5 66312.5 105851 100
## x[end(x)[1]] 1110956 2298408.0 3670360.45 2334753.0 4475915.0 19235341 100
## rev(x)[1] 6536063 7969103.0 11004418.46 9973664.5 12340089.5 28447454 100
## Unit: nanoseconds
## expr min lq mean median uq max neval
## x[length(x)] 327 722.0 1644.16 1133.5 2055.5 13724 100
## mylast(x) 1962 3727.5 9578.21 9951.5 12887.5 41773 100
## tail(x, n = 1) 9829 21038.0 36623.67 43710.0 48883.0 66289 100
## dplyr::last(x) 21832 35269.0 60523.40 63726.0 75539.5 200064 100
## x[end(x)[1]] 21008128 23004594.5 37356132.43 30006737.0 47839917.0 105430564 100
## rev(x)[1] 74317382 92985054.0 108618154.55 102328667.5 112443834.0 187925942 100
##
##
## This immediately rules out anything involving rev or end since they're clearly not O(1) (and the resulting expressions are evaluated in a non-lazy fashion). tail and dplyr::last are not far from being O(1) but they're also considerably slower than mylast(x) and x[length(x)]. Since mylast(x) is slower than x[length(x)] and provides no benefits (rather, it's custom and does not handle an empty vector gracefully), I think the answer is clear: Please use x[length(x)]. |anonymous |2016-05-15 12:39:46Z |3260 | 12| 15| 15|
## | 77434|If you're looking for something as nice as Python's x[-1] notation, I think you're out of luck. The standard idiom is
##
## x[length(x)]
##
##
## but it's easy enough to write a function to do this:
##
## last <- function(x) { return( x[length(x)] ) }
##
##
## This missing feature in R annoys me too! |Gregg Lind |2008-09-17 13:27:17Z |20550 | 81| 66| 4|
## | 77434|Combining lindelof's and Gregg Lind's ideas:
##
## last <- function(x) { tail(x, n = 1) }
##
##
## Working at the prompt, I usually omit the n=, i.e. tail(x, 1).
##
## Unlike last from the pastecs package, head and tail (from utils) work not only on vectors but also on data frames etc., and also can return data "without first/last n elements", e.g.
##
## but.last <- function(x) { head(x, n = -1) }
##
##
## (Note that you have to use head for this, instead of tail.) |Florian Jenn |2008-09-30 16:28:14Z |5171 | 18| 23| 9|
## | 77434|The dplyr package includes a function last():
##
## last(mtcars$mpg)
## # [1] 21.4 |Sam Firke |2016-06-07 18:51:23Z |21092 | 102| 85| 14|
## | 77434|I just benchmarked these two approaches on data frame with 663,552 rows using the following code:
##
## system.time(
## resultsByLevel$subject <- sapply(resultsByLevel$variable, function(x) {
## s <- strsplit(x, ".", fixed=TRUE)[[1]]
## s[length(s)]
## })
## )
##
## user system elapsed
## 3.722 0.000 3.594
##
##
## and
##
## system.time(
## resultsByLevel$subject <- sapply(resultsByLevel$variable, function(x) {
## s <- strsplit(x, ".", fixed=TRUE)[[1]]
## tail(s, n=1)
## })
## )
##
## user system elapsed
## 28.174 0.000 27.662
##
##
## So, assuming you're working with vectors, accessing the length position is significantly faster. |scuerda |2014-05-13 18:20:29Z |525 | 12| 3| 2|
## | 77434|Another way is to take the first element of the reversed vector:
##
## rev(dat$vect1$vec2)[1] |James |2014-02-11 15:36:09Z |65338 | 193| 155| 3|
## | 77434|I have another method for finding the last element in a vector.
## Say the vector is a.
##
## > a<-c(1:100,555)
## > end(a) #Gives indices of last and first positions
## [1] 101 1
## > a[end(a)[1]] #Gives last element in a vector
## [1] 555
##
##
## There you go! |Akash |2015-01-16 20:35:49Z |183 | 6| 2| 0|
## | 77434|Package data.table includes last function
##
## library(data.table)
## last(c(1:10))
## # [1] 10 |Enrique Pérez Herrero |2016-06-07 18:42:49Z |3496 | 33| 32| 0|
## | 77434|Whats about
##
## > a <- c(1:100,555)
## > a[NROW(a)]
## [1] 555 |Kurt Ludikovsky |2015-09-10 19:42:18Z |672 | 20| 6| 0|
## | 77434|The xts package provides a last function:
##
## library(xts)
## a <- 1:100
## last(a)
## [1] 100 |smoff |2017-05-03 12:51:58Z |550 | 21| 5| 0|
## | 77434|As of purrr 1.0.0, pluck now accepts negative integers to index from the right:
## library(purrr)
##
## pluck(LETTERS, -1)
## "Z" |LMc |2023-02-14 21:45:20Z |10665 | 41| 29| 0|
##
##
## | QuestionId| ParentId|UserId |CommentTime |body |
## |----------:|--------:|:-------------------------|:--------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
## | 76398196| 76398196|Leroy Tyrone |2023-06-03 22:48:20Z |Welcome to SO Rossy. Please review How to make a great R reproducible example and update your question accordingly. While the SO community are very helpful, you will increase your chances of getting an answer if your make your question easier to answer. Thanks |
## | 76397984| 76397984|Ben Bolker |2023-06-03 20:49:01Z |Can you please give us some more information, including (1) a link to the documentation of gaussian_filter(); (2) a minimal reproducible example; (3) some specific examples (with code) of what you've tried that hasn't worked? |
## | 76397984| 76397984|user2554330 |2023-06-03 21:58:02Z |Showing the output of the Python code on a simple dataset would be extremely helpful. |
## | 76397984| 76397984|JStorey |2023-06-03 22:41:07Z |Apologies, I should have included some examples. I've updated my answer. Thanks! |
## | 76397805| 76397805|Leroy Tyrone |2023-06-03 22:34:20Z |How is this substantially different from your other question? Apart from adding a third method that is? Wouldn't it be better to update your first question rather than cluttering SO with near-identical questions? Just a though. Thanks. |
## | 76397805| 76397805|stats_noob |2023-06-03 22:54:08Z |@ Leroy Tyrone: Thank you for your reply! I acknowledge that this question is similar to my previous question - in this current question, I am trying to clarify a different issue on matching matrices with data frames. In the past, members in the community have advised me not to update old questions but to ask new questions instead. If you think it might be a good idea to delete this current question, I can do this. Thanks! |
## | 76397627| 76397651|Guilherme Lopes de Campos |2023-06-03 19:20:15Z |Thank you so much TarJae, I am very gratefull |
## | 76397512| 76397558|pcantalupo |2023-06-03 18:53:08Z |I didn't realize colorbar was a drop in replacement for guide_legend. Spent all morning trying to figure this out. Thank you |
## | 76397124| 76397124|user12256545 |2023-06-03 17:27:32Z |maybe use pymol to visualize the docking, it works well with pdbqt, whoever i dont understand what you mean with 2D diagramms? |
## | 76397124| 76397124|I_O |2023-06-03 17:42:06Z |There might be a number of packages, but it's hard to tell without your domain-specific expertise. Can you add an example of how the data look like before feeding it into the particular drawing tool and what visual output is desired? That aside, you might get lucky browsing one of CRAN's task-specific package collections (in your case that would be "Omics" I guess: cran.r-project.org/web/views/Omics.html ) |
## | 76397124| 76397124|ginn |2023-06-03 18:13:00Z |The post's been updated. |
## | 76396915| 76396915|Ricardo Semião e Castro |2023-06-03 17:12:40Z |Your code works correctly, i.e. creates the models. What you don't like is that the summary function for rms models (rms:::summary.rms) doesn't print information about the intercept, is that it? If it is, I think you should re-write your question. The part about the mapping doesn't seem to matter too much, you just want a custom summary function |
## | 76396915| 76396915|bison2178 |2023-06-03 17:24:43Z |@RicardoSemiãoeCastro, yes that is correct Ricardo, the rms summary function is not printing the estimates for intercept. However if we see the induvidual model summary, foo$model the intercepts are printed, but I am not sure how to put them all together in a table. |
## | 76396811| 76396811|Mikael Jagan |2023-06-03 16:05:33Z |Are you maybe looking for ks.test? |
## | 76396811| 76396811|Ben Bolker |2023-06-03 19:23:30Z |I'm puzzled. I can't find pKolmogorov2x anywhere in the current R code base (the current version has calls to functions like C_pkolmogorov_two_exact). Can you say a little more about how you decided that this was the right call? (I do find this in some repos that link to older versions of R ...) |
## | 76396563| 76396563|I_O |2023-06-03 17:56:35Z |Replacing polygons with their centroids (to get point coordinates) and grouping/aggregating them by territorial units should be straightforward. However, these are details of implemention (in R). It might be worthwile to first inquire over at gis.stackexchange.com about how to set up the model (type, constraints, pitfalls etc.), especially if you're concerned about the extent (and perhaps comparability, province-wise) of your data. Engineering your data to fit into your R function of choice should hopefully be easy then. |
## | 76396563| 76396563|Saïd Maanan |2023-06-03 18:50:12Z |Thank you very much @I_O. I did ask in gis.stackexchange.com, but I'm still waiting for someone to answer me. |
## | 76396563| 76396563|I_O |2023-06-03 20:17:39Z |I guess I took the technical issue out of the way: {GWmodel} seems to accept only data of class sp (package {sp}). The spatial statistics involved might be the greater challenge though. |
## | 76396563| 76397875|Saïd Maanan |2023-06-03 20:36:19Z |Thank you. I will wait a little then accept your answer. |
## | 76396349| 76396349|Eva |2023-06-03 14:48:05Z |Welcome to stackoverflow. Can you please give us some toy (dummy) data where we can experiment? This would be quite helpful for us to understand and answer your question. |
## | 76396349| 76396349|r2evans |2023-06-03 14:48:10Z |"2 different subsets" can have correlation so long as they have the same length and they are tied together logically (such as: date, person, sequence of event, etc). While the logical-tying thing is an "analytical" thing (meaning without it you can still get a number, whether or not it is meaningful or just a statistical fabrication/lie), but the former is a requirement. If you look at stats:::cor.test.default, one of the first checks if if (length(x) != length(y)). |
## | 76396349| 76396349|I_O |2023-06-03 16:05:43Z |As Eva hinted: the solution might involve a bit of reshaping of your original dataframe con. Please provide this data by pasting the output of dput(con) or a sufficient subset, say, five paired observations per group for, say, two outcomes. |
## | 76396323| 76396323|I_O |2023-06-03 18:04:32Z |Welcome on SO, gagrochowski. Without an example data set it's really hard to debug the code (see: stackoverflow.com/help/how-to-ask ). You can provide sample data conveniently by pasting the output of dput(head(map, 10)). |
## | 76395825| 76395825|mhovd |2023-06-03 11:30:10Z |How are your matrices structured, e.g. are they in a list, or just in the global environment? |
## | 76395825| 76395825|s28 |2023-06-03 11:33:03Z |I added some data information using dput |
## | 76395825| 76395825|Andre Wildberg |2023-06-03 14:26:16Z |Your matrix already has columns of class "numeric", Vectorize(\(x) class(mat[,x]))(1:ncol(mat)) [1] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" [8] "numeric" "numeric" "numeric" "numeric" |
## | 76395522| 76395522|r2evans |2023-06-03 15:00:21Z |stackoverflow.com/q/5890584/3358272, stackoverflow.com/q/11608167/3358272 |
## | 76395254| 76395254|Gwang-Jin Kim |2023-06-03 08:51:33Z |can you give some very short example vectors and what the return value should be? |
## | 76395254| 76395254|Anti |2023-06-03 09:10:52Z |@Gwang-JinKim: Of course, edited my question. |
## | 76395254| 76395254|r2evans |2023-06-03 12:38:28Z |Your text x[i] <= 0.5 and x[i] >= 0.5 is going to only match the precise 0.5. I think the code is less confusing, but please edit your text so that the logic works. |
## | 76395254| 76395254|Anti |2023-06-03 13:09:01Z |@r2evans: Wonder whether it's better now. Tbh, I don't understand what you meant with "is going to only match the precise 0.5". |
## | 76395254| 76395254|r2evans |2023-06-03 14:38:28Z |x <= 0.5 & x >= 0.5 can only be true when x == 0.5, that's what I'm saying. |
## | 76395254| 76395254|r2evans |2023-06-03 15:07:45Z |There is nothing that base R code can solve to make this problem significantly faster: whether a for loop or sapply or even a (slightly-contrived) rolling-window is going to have a level of inefficiency when dealing with larger data. Since your vector is large, I wonder if a quick cpp function would do better, so that as soon as it finds a value that meets your criteria, it can stop looking (which a vectorized solution will not do). |
## | 76395254| 76395254|Anti |2023-06-03 15:39:41Z |@r2evans: Ah, ok. But you know what I want to get? For every value in the vector I want to get the index of the next subsequent value that is >= x[i] + 0.5 and I want to get the index for the next number that's <= x[i] - 0.5. Will think about how to better phrase it. |
## | 76395254| 76395254|r2evans |2023-06-03 15:57:56Z |I understand, and coming up with a naive vectorized (memory-explosion) or a naive non-vectorized for loop (slow) is not that hard in base R. But you want both fast and efficient, which I think is going to be very difficult without compiled code. |
## | 76395254| 76395254|Anti |2023-06-03 16:12:52Z |@r2evans: How would you've done it? I'd be still very interested in the memory-expensive solution. Thus, please share it. Thanks a lot in advance. |
## | 76395254| 76395254|Gwang-Jin Kim |2023-06-03 17:30:06Z |Yes, in this case, for-loop is faster than sapply - because apply functions involve intermediate lists and lists are slower than vectors and matrices (because elements of vectors and matrices are of a single type - and thus neet not be that flexible). |
## | 76395254| 76395254|Gwang-Jin Kim |2023-06-03 17:35:22Z |since you have to check for each position anew till then end of the vector - simlple vectorization wouldn't do it. |
## | 76395254| 76395254|Gwang-Jin Kim |2023-06-03 17:36:02Z |may I ask for what you want to do this actually? what you want to do with those values afterwards? - perhaps there might be another way to do sth you think you have to do this way. |
## | 76395254| 76395254|Anti |2023-06-03 17:48:02Z |@Gwang-JinKim: Sure. I have a time series (growth rates with varying conditions). After the condition has changed, I want to identify whether the time series' values either increase or decrease by a critical threshold. Therefore, my idea was to identify the first occurences for both directional change indications (indices) and then to compare the indices itself to understand whether a change in the condition led to a positive or negative change in the time series readings. |
## | 76395098| 76395098|freyberg |2023-06-03 09:01:15Z |Filter out the terms you would like to have in your plot and then plot it yourself or modify the plots you obtain from dotplot. For putting the different ontologies into separate categories I'd suggest using facet_grid. You just need to have this information in the associated dataframe, i.e., what term belongs to which category. If you provide an example dataset, I can provide a more exact answer. |
## | 76395098| 76395098|Saleh Sarwar |2023-06-03 12:50:17Z |I have added example data to the post. |
## | 5869539| 5869675|donodarazao |2011-05-03 13:02:18Z |Thanks for the fast answer! I guess I understand the purpose of levels and labels now. Maybe any suggestions for making output humanly better readable without manually editing table names and plot legends? |
## | 5869539| 5869675|Ben Bolker |2011-05-03 14:13:31Z |I would often transform the levels right before plotting/creating labels, e.g. keep the levels as "a","b","c" while manipulating, then use levels(f) <- paste("Treatment",toupper(levels(f)),sep=" ") [or something] when plotting. Or create a parallel factor f_pretty that you carry along and use only for output ... |
## | 5869539| 5869675|donodarazao |2011-05-04 07:22:27Z |I thought about both, but both methods have disadvantages. The first might get tedious when plotting a huge number of graphs, and the second might get tedious when a lot of data aggregation is involved in scripting. But apparently there's no way to avoid that easily, so I'll go with your suggestions. :) |
## | 5869539| 5869675|Joris Meys |2016-01-03 12:56:00Z |@42- I'm not sure what you mean with "numeric values". If you mean the internal values in the factor then that's exactly what I said above. Hence the mentioning of internal numerical values. If you specify the levels argument, you give the values in the input that have to be matched to the labels argument. R keeps the labels (as the attribute levels, and there's the confusion) and stores integer codes internally. These integer codes have nothing to do with the original values, whatever type they were. I think you misunderstood me. |
## | 5869539| 5869675|IRTFM |2016-01-03 17:53:23Z |Apologies. What you write was my understanding as well, and now that I am re-reading your question, I cannot see where I thought you said differently. I'll delete my comment because it adds less than nothing. |
## | 5869539| 5869675|Lennert |2016-12-08 12:50:48Z |Maybe it's good to mention explicitly that if you want to access the factor values, this is always done by the levels (optionally set by the 'labels' argument)? This would clarify the observation of the OP that df$f=='a' does not work when the levels are modified? |
## | 5869539| 5869675|s_baldur |2018-05-02 10:38:35Z |You say 'A factor has only a level attribute, which is set by the labels argument in the factor() function'. But (please correct me if I'm wrong) that's just one way; after the factor has been created the level attribute can also be reset afterwards with levels<- or simply attr<-(x, "levels"). Meanwhile (confusingly) the levels argument in factor() only works to reorder the levels. |
## | 5869539| 5869675|Joris Meys |2018-05-02 14:06:48Z |@snoram you can do it afterwards too. The levels argument in factor() indicates the input levels (i.e. the unique values to look for in the original vector), whereas labels gives the output levels (i.e. the labels attached to the internal numeric interpretation). |
## | 5869539| 5869675|s_baldur |2018-05-02 14:29:01Z |Thanks. I guess this comment section has gone out of control... But since input levels defaults to all values found in vector in an increasing order one would specify them only to change the order or convert some (excluded value) to NA OR define values that might appear later. Or am I missing some other "benefit". |
## | 5869539| 5869675|Joris Meys |2018-05-03 08:12:21Z |@snoram ignore values you don't want for example. |
## | 5869539| 5869675|John |2019-01-25 01:41:09Z |Just a reminder that you can see the internal codes used, with as.numeric(df$f) |
## | 5869539| 30067419|Soren |2018-08-25 13:33:33Z |this is a nice package and thanks for posting about it (and writing it). It seems the sort of functionality that should be native to R factors -- nice to see a package that provides this sort of name-value pair mapping with built-in equivalency checks. |
## | 5869539| 30067419|curious lab rat |2020-11-13 05:52:28Z |d'oh! I was excited about using lfactors until I noticed that it "requires that the levels be numeric." Figures that require publication-style labels (Greek letters, italics, superscripts and all) are a good use case for a system of factors that could still include text levels (the latter could help minimize errors by making data tables more readable). |
## | 5869539| 30067419|pdb |2020-11-14 14:37:06Z |curious lab rat, levels are numeric and labels are text. Can you come up with a code example where that is an issue? |
## | 5869539| 30067419|Herman Toothrot |2021-02-04 16:37:29Z |This should totally be included in base or ggplot. |
## | 76394767| 76394767|Community |2023-06-03 07:09:00Z |Please provide enough code so others can better understand or reproduce the problem. |
## | 76394485| 76394485|IRTFM |2023-06-03 04:33:18Z |meta.stackexchange.com/questions/92074/…. Show your efforts. Define what âstart timeâ and âend timeâ are and why diff.time cannot be used? |
## | 76394461| 76394618|stats_noob |2023-06-03 05:16:06Z |@ Dave2e: thank you so much for your answer! Will this result in identical(final_summary, final_summary1) = TRUE? |
## | 76394461| 76394618|stats_noob |2023-06-03 19:27:18Z |( I tried to use the IDENTICA() statement and it tells me FALSE) |
## | 76394461| 76394618|Dave2e |2023-06-03 19:59:14Z |Are you sure the rows of the data frame are in the same order? |
## | 76394451| 76394451|benson23 |2023-06-03 03:32:01Z |You need to use fixed = TRUE. i.e. sub(pattern = substr(mystring, 2, 40), replacement = "HELLO", mystring, fixed = T) |
## | 76394390| 76394420|Adrian |2023-06-03 03:16:54Z |Thanks, for the second solution you posted, what if I want to replace character index from 3 to 15 instead of 1 to 15? Is there a way to do this? The desired output would be 'thorange cat` (for replacing the 3rd to 15th character with 'orange') |
## | 76394390| 76394420|benson23 |2023-06-03 03:19:28Z |In that case you might do sub("(^.{2}).{13}", "\\1orange", string), but then I think substr would be much clearer in this case. |
## | 76394390| 76394420|Adrian |2023-06-03 03:29:13Z |Perfect, thanks. I have a follow up question (related to using sub and substr) here: stackoverflow.com/questions/76394451/… For some reason, sub is not recognizing the pattern based on substr(). If you have a chance, would appreciate you taking a look |
## | 76394250| 76394250|r2evans |2023-06-03 02:26:15Z |Welcome to SO, José Amorim! This is an interesting question. I'm voting to migrate it to Cross Validated, as that site is geared towards discussing and resolving theoretical discussions a lot more than here on SO (which is mostly about "how to program things"). While CV is more tolerant of open discussions, it really does benefit when questions have more reproducible context including sample data, code used to generate the model, and likely the console representation of the model results (whether direct or via summary, I don't know what it should be for these models). Thanks! |
## | 76393934| 76393934|Jon Spring |2023-06-02 23:14:59Z |df$d_signed = abs(df$d) * sign(df$b)? |
## | 76393934| 76393934|Sylvia Rodriguez |2023-06-02 23:19:36Z |Great! Thank you :) |
## | 76393686| 76393686|Onyambu |2023-06-03 00:20:25Z |Environment variables for a user has nothing to do with R. You need to set the variables in your pc. check unix.stackexchange.com/questions/117467/… for example on how to do it in linux, and phoenixnap.com/kb/windows-set-environment-variable on windows |
## | 76393686| 76393686|Konrad Rudolph |2023-06-03 10:05:11Z |@Onyambu Except that these solutions categorically do not work for RStudio (or any other GUI applications) on macOS. |
## | 76393686| 76393686|Onyambu |2023-06-03 16:33:39Z |@KonradRudolph I specified that you do not necessarily need to use R. Setting environment variable has nothing to do with R. I am not sure about macOs. As far as I know, in windows, one cam simply use setx command, or just search for environment variables from the search button and set them the way they want |
## | 76393686| 76393686|Konrad Rudolph |2023-06-03 16:38:48Z |@Onyambu But as my answer shows there is no good non-R way to do this on macOS. You need R (specifically, .Renviron). So the question, for better or for worse, does have to do with R. |
## | 76393636| 76393636|Baraliuh |2023-06-02 22:05:27Z |Stack overflow deals explicitly with coding related questions. For advice on statistics/modeling/etc you could try cross validate: stats.stackexchange.com |
## | 76393636| 76393636|jdobres |2023-06-02 22:06:47Z |It sounds like your LASSO model might be overfitting your training data. Your train/test split might also have introduced a strange bias just by chance. Try re-splitting the data. |
## | 76393636| 76393878|Community |2023-06-03 03:47:50Z |Your answer could be improved with additional supporting information. Please edit to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers in the help center. |
## | 76393577| 76393577|r2evans |2023-06-02 21:34:54Z |Please make this question reproducible. This includes sample unambiguous data (e.g., data.frame(x=...,y=...) or the output from dput(head(x)) into a code block) and intended output given that input. Refs: stackoverflow.com/q/5963269, minimal reproducible example, and stackoverflow.com/tags/r/info. |
## | 76393401| 76393401|Peter |2023-06-02 21:07:01Z |Please make your question reproducible: include the output of dput(R_test_8) in the question. |
## | 76393401| 76393401|Jon Spring |2023-06-02 21:20:42Z |Mississppi2 <- Mississppi |> mutate(Type = fct_rev(Type)) and then use Mississppi2. That will make the Type variable be a factor that is in the opposite order of its current default alphabetical order. |
## | 76393401| 76393401|r2evans |2023-06-02 21:27:51Z |just about (or perhaps "all"?) questions on SO that ask about "order of axis", "order of plots", or "order of legend" are resolved with the use of factor and using its levels= argument |
## | 76393401| 76393401|r2evans |2023-06-02 21:28:07Z |see ggplot2/axis-order: stackoverflow.com/q/3253641/3358272, stackoverflow.com/q/12774210/3358272, stackoverflow.com/q/18401931/3358272; ordering with groups stackoverflow.com/q/44350031/3358272 |
## | 76393401| 76393401|r2evans |2023-06-02 21:30:25Z |Welcome to SO, tatertot2! This question will be closed as a duplicate, please look at the links I provided and learn how to apply it to your data (which we don't have). If after you do that you still cannot figure it out, please make this question reproducible by editing it and including sample data (perhaps using dput or data.frame, see that link), and show the updated code with any warnings/errors. Once that's done, @-ping me and we'll sort out what is different or such. Thanks and good luck! |
## | 76393378| 76393378|Adrian |2023-06-03 02:23:44Z |Just wondering why this question is closed when it's asking to replace multiple occurrences of a given pattern. The "duplicate" questions are asking to replace a single occurrence only. What am I missing here? |
## | 76393244| 76393244|nrennie |2023-06-02 20:22:08Z |Can you share an example, including what you have tried so far? I would suggest looking at defining a custom class as per this answer: stackoverflow.com/questions/73236486/… |
## | 76393244| 76393244|mikeytop |2023-06-02 20:34:53Z |I have edited the original post to further explain what I have tried. |
## | 76393244| 76393244|I_O |2023-06-02 21:39:06Z |see discussion here, plz: github.com/yihui/xaringan/issues/5 (BTW, you can "black out" any slide during presentation with the B-key) |
## | 76393244| 76393244|mikeytop |2023-06-03 01:06:30Z |This discussion is only for the title slide, and not a general solution for any slide. |
## | 76393244| 76393244|mikeytop |2023-06-03 08:21:31Z |Turns out you can use class: clear to do this in the metropolis theme. This solves the problem. |
## | 76393217| 76393217|maluwalmk |2023-06-02 21:45:56Z |Can you add sample data from the .txt file so we can reproduce the results? |
## | 76393217| 76393217|I_O |2023-06-02 22:24:02Z |In the middle-to-long run, getting familiar with ggplot will save you a ton of work for such purposes. For an instant solution, you can set up a multi-figure plot option (e. g. par(mfrow = c(2, 3)) for a 2-by-3 panel plot), and proceed with your loop-approach. |
## | 76392914| 76392914|sln |2023-06-02 19:17:30Z |Why "skip" in desired output ? Are quotes unwanted ? |
## | 76392914| 76392914|Simon Harmel |2023-06-02 19:48:04Z |@sln, well, that is what originally included in the survey question. I should have mentioned that. Good catch:) |
## | 76392893| 76392893|GuedesBF |2023-06-02 19:56:54Z |cant you just use regular synthax for negative numbers (-100), and adjust the methods for displaying the values inside excel? I am not sure, but I believe whether excel shows values as () or - is more of a display option, and not how the values are encoded. |
## | 76392893| 76392893|Laura |2023-06-02 20:09:05Z |While I technically could, that wouldn't really accomplish what I want since I want the excel output to already be completely formatted and not need any manual intervention |
## | 76392893| 76392893|DJJ |2023-06-02 20:16:43Z |A way out might be to format the data.frame in R the way you want it to be, then to export it as text but I haven't tested this idea. |
## | 76392893| 76392893|DJJ |2023-06-02 20:26:22Z |@Laura, I suspect the dollar sign and the decimals are governed by the excel default format. On the R side unfortunately, I could not find any solution. I might be worth it to raise the issue on the repository here |
## | 76392840| 76392968|r2evans |2023-06-02 19:22:41Z |Your first assertion is not true. By using mapply, the OP is passing length-1 arguments to fun1. You can verify this with debug(fun1) then looking at x and y when you try the OP's code. |
## | 76392840| 76392968|DJJ |2023-06-02 19:24:14Z |@r2evans, indeed thanks for the feedback |
## | 76392784| 76392784|MrFlick |2023-06-02 19:04:01Z |With the solution from the existing question, you could do something like df %>% crossing(x = seq(-4,4,.1)) %>% mutate(y=1/(1+2.71828^(-1.7*(PseudoA*(x-PseudoB))))) %>% ggplot() + aes(x, y, color=items) + geom_line() after running library(tidyverse). If you want to only do a subset of the lines, just filter the values before the crossing() step. |
## | 76392725| 76392797|Allan Cameron |2023-06-02 19:47:49Z |Learned something new here. Great answer, deserves more upvoted. |
## | 76392725| 76392797|caveman |2023-06-02 20:16:44Z |this works really great, thanks! |
## | 76392725| 76392797|r2evans |2023-06-02 20:20:33Z |@AllanCameron ... my secret: I didn't know this before now, this is how I learn things in R/python: by trying things when somebody asks. The biggest thing I think I bring to the game is having a good idea where to look and what keywords/topics to look for. Thanks! |
## | 76392725| 76392797|Allan Cameron |2023-06-02 21:20:43Z |@r2evans - that's my secret too ;) |
## | 76392709| 76392709|Jon Spring |2023-06-02 19:31:49Z |No, there is not any built-in way to make a legend for a facet label. But there ways to add a caption, or add annotations, or an inset plot area. Can you draw or describe what you're looking for? |
## | 76392709| 76392709|Jon Spring |2023-06-02 19:36:26Z |Keep in mind you can also add line breaks, either with the text \n or with functions like stringr::str_wrap(), either to your y axis labels and/or your facet labels. |
## | 76392709| 76392709|Linton |2023-06-02 20:42:53Z |I added an example of what I'd like the legend to be above, but basically I just want to include a key for the pathway acronyms I am using (for example, explain what 3-H BC is). |
## | 76392709| 76392709|Jon Spring |2023-06-02 21:26:11Z |I think these SO questions might help. It sounds like the main difference is where you want the text and how big it is, which might require some tweaking to the theme to leave enough room. stackoverflow.com/questions/12409960/… and stackoverflow.com/questions/54776142/add-text-outside-plot-area |
## | 76392709| 76392709|Jon Spring |2023-06-02 21:29:16Z |Another approach would be to create a blank plot that is just your legend and inset that like here: patchwork.data-imaginist.com/reference/inset_element.html |
## | 76392610| 76392624|Sara Haas |2023-06-02 18:22:43Z |What is the (?!) |
## | 76392610| 76392624|Sara Haas |2023-06-02 18:27:59Z |Nothing is changing still - im getting back the exact same data frame |
## | 76392610| 76392624|TarJae |2023-06-02 18:30:12Z |have you loaded the packages dplyer and stringr? |
## | 76392410| 76392410|Kat |2023-06-02 20:41:56Z |If you just want to keep the first genre you could use grepl or tidyverse::separate for example: separate(df1, col = what, into = "what2", sep = ",", extra = "drop") where df1 is the data frame, col = what: what is the name of the column you want to modify, into = "what2": what2 is the name of the new column. This will drop the original column, if you want to keep the original column, add remove = F. |
## | 76392409| 76392409|asd-tm |2023-06-02 18:09:21Z |Please provide sample data for instance with dput() |
## | 76392409| 76392409|Billy |2023-06-02 19:26:15Z |@asd-tm I uploaded the dataset |
## | 76392409| 76392409|I_O |2023-06-02 23:07:37Z |A quick solution would be to create (a) your faceted plot, (b) an unfaceted plot and stack (or otherwise arrange) both with package {patchwork} or {cowplot}. See e.g. here: ggplot2-book.org/arranging-plots.html |
## | 76392409| 76392409|Billy |2023-06-03 14:46:43Z |@I_O I didn't find something helpful. Also, I edited my post in order to make more clear what I want to achieve. Can you provide some in order to fix my problem? |
## | 76392409| 76392409|I_O |2023-06-03 15:37:42Z |Well, {patchwork} did what you asked for, didn't it? The ocumentation also shows how to adjust the relative heights of stacked plots. Anyhow, see answer with ggplot-only solution, plz. |
## | 76392409| 76396754|Billy |2023-06-03 16:32:50Z |I made the appropriate changes and it worked. Thank you so much! |
## | 76392367| 76392367|br00t |2023-06-02 18:58:14Z |If I'm not mistaken you can call write_sheet() with an empty data.frame and that will create a sheet with one empty row you can then subsequently call sheet_append() with your actual data. |
## | 76392347| 76392347|Ben Bolker |2023-06-02 18:11:52Z |replicate the repeated variables (e.g. using zoo::na.locf or tidyr::fill). The repeated variables are not a problem. |
## | 76392347| 76392347|Phoenix Byrne |2023-06-03 11:40:41Z |Thank you for the prompt response. So the ANCOVA computation won't be influenced by the fact age, for example, is duplicated for each participants? it will somehow know this is the case or do I need to add another argument to my code to account for it? Many thanks! |
## | 76392228| 76392228|user20650 |2023-06-02 17:19:32Z |What does str(data$cc) return please? |
## | 76392228| 76392228|Svenja |2023-06-02 17:22:48Z |Hi! This returns Factor w/ 4 Levels: "1", "2", "3", "4". Otherwise the variables are all continuous. |
## | 76392228| 76392228|user20650 |2023-06-02 17:27:33Z |Thanks. Nothing unexpected here ... this is how characters / factors (categorical) terms are represented in a regression model. I'll try to find a link that explains. |
## | 76392228| 76392228|user20650 |2023-06-02 17:33:57Z |perhaps stackoverflow.com/questions/36555639/… ; stackoverflow.com/questions/15231837/… |
## | 76392228| 76392228|user20650 |2023-06-02 17:37:32Z |and maybe useful stats.stackexchange.com/questions/149621/… ; stats.stackexchange.com/questions/274748/… ; |
## | 76392228| 76392228|qdread |2023-06-02 17:42:07Z |Yes, because cc has four discrete levels, you need three parameters to estimate the effect of cc on math. It seems like cc=4 is your reference level, so cc1 can be interpreted as the difference in individual math achievement between climate 1 and climate 4. After accounting for the other predictors, climate 1 has an expected value of individual math achievement -0.43 less than climate 4. The same logic applies to the interaction term. |
## | 76392228| 76392228|Svenja |2023-06-02 17:44:51Z |Alright! Got it, that solved a problem I had been thinking about for days now (although it seems to be rather a statistical then an r issue). Thank you so much!! |
## | 76392177| 76392233|Sou |2023-06-03 02:42:47Z |Thank you for the answers, in the first answer, how do i group them if the values are higher in number? the groups are much larger than three 100-s or three 200-s. I cannot manually type them out. Please help |
## | 76392177| 76392233|TarJae |2023-06-03 04:27:07Z |First: if you have 100, 100, 100 you can do rep(100, 3). Then the data in the first answer is already grouped by group. |
## | 76392149| 76392149|Reinderien |2023-06-02 22:29:38Z |It's odd that the team has five players plus a captain. Shouldn't it have four players plus a captain? |
## | 76392055| 76392055|joshbrows |2023-06-02 16:56:17Z |Try str_match_all(string, r'(pat1[\s\S]+?pat2)'). |
## | 76392055| 76392055|TarJae |2023-06-02 16:59:58Z |library(stringr) for (match in str_extract_all(string, "(?s)pat1.*?pat2")[[1]]) { print(match) }. By the way this is not a duplicate question of the referred link! Will give [1] "pat1 hello333\n pat2" [1] "pat1 again pat2" |
## | 76392055| 76392055|Onyambu |2023-06-02 17:22:26Z |Include lazy ? in your regex str_extract_all(string, "(?s)pat1.+?pat2") in your case: str_extract_all(string, regex("pat1.+?pat2", dotall = TRUE)) |
## | 76391960| 76392049|dicegods |2023-06-02 19:07:15Z |Appreciate the quick response. I am still very much a newbie at R. Coming from a SAS background, if I wanted to refer to a variable called "var", I could use &var., and that would work for 99+% of any use case that I would have for it. Is there any sort of equivalence here? Neither of the codes below work. ``` final2 = subset(final2, select=-c(paid_totmbr, membsort, {{membID}})) final2 = subset(final2, select=-c(paid_totmbr, membsort, !!sym(membID))) ``` |
## | 76391856| 76391856|joran |2023-06-02 16:20:46Z |This bit: Mydata <- read_csv(Mydata,...) is confusing. The first argument to read_csv() should be a file path, eg "folder/filename.csv". Mydata sounds like the name of a data frame, indeed you're assigning the result to the same name. Are you sure you gave read_csv() a file path? |
## | 76391856| 76391856|Gato |2023-06-02 19:02:20Z |Omg I was dum. That was the issue. Thanks |
## | 76391852| 76391852|Jon Spring |2023-06-02 16:17:56Z |It is good practice to include some sample data in your question, to reduce ambiguity and to save each potential answerer from needing to reverse engineer their own. |
## | 76391852| 76391852|Jon Spring |2023-06-02 16:23:03Z |For instance, would it be adequate for us all to use set.seed(42); my_vector <- rnorm(200, 0.0012, 0.000003))? I'm not sure that demonstrates the binning effect you are looking to recreate in ggplot2. |
## | 76391852| 76391852|roma |2023-06-02 16:27:31Z |I don't know what the theoretical distribution of "A" is, but I am now looking for a way to save my vector so that others can easily create it themselves. If you happen to know, please let me know. |
## | 76391852| 76391852|joran |2023-06-02 16:31:14Z |I believe stat_bin() accepts an explicit breaks argument, so you could do stat_bin(geom = 'hist',breaks = <breaks vector from hist() output>), perhaps? |
## | 76391852| 76391852|joran |2023-06-02 16:36:12Z |Gah, I think that should be geom = 'bar' in my previous comment. |
## | 76391852| 76391852|roma |2023-06-02 16:44:10Z |It's work, thx! |
## | 76391830| 76391830|nrennie |2023-06-02 17:05:43Z |Can you please share the code (and ideally data using dput()) that you used to create these outputs? It makes it much easier for other to help if we can recreate your results. |
## | 76391830| 76391830|Ajna F Kertesz |2023-06-03 19:20:53Z |interact_model<- glmer(Conformity_pref ~ Smart*Behave*Grades + (1 | PID) , data = MV_final, family=binomial(link = "logit")) summary(interact_model) All values are binary (1,0), except for PID (0-1295) dput(head(MV_final[, c("Conformity_pref", "PID", "Smart", "Behave", "Grades")])) structure(list(Conformity_pref = c(1, 1, 1, 1, 1, 1), PID = c("10", "10", "10", "10", "10", "10"), Smart = c(1L, 1L, 1L, 0L, 0L, 0L), Behave = c(0L, 0L, 0L, 1L, 1L, 1L), Grades = c(0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame")) |
## | 76391751| 76391751|joshbrows |2023-06-02 16:43:58Z |It looks like you have an extra c() when defining df_100 and df_full. It should be something like df_100 = data.frame(siteid = seq(1, 5, 1), conflu = c(3, 2, 4, 5, 6), diflu = c(9, 2, 30, 2, 5)) and df_full = data.frame(siteid = seq(1, 10, 2), conflu = c(6, 3, 5, 2, 3), diflu = c(5, 9, 2, 30, 7)) |
## | 76391482| 76391482|Mike |2023-06-02 15:25:14Z |you might have to make two plots one for functions and one for gradient and then put the title directly above the gradients and then use cowplot, patchwork or gridextra to stitch them together |
## | 76391482| 76391482|Yacine Hajji |2023-06-02 15:34:56Z |How about you create 2 columns: 1 with either nabla g or g values, the second one with 1:5. Then you assign your values to these 2 columns and you do a multi-panel facet_wrap zevross.com/blog/2019/04/02/… |
## | 76391151| 76391228|GaB |2023-06-02 15:35:31Z |you know what? I have tried this actually! :)) But I believe since my environment wasn't clean, I could not get it. Thanks |
## | 76391100| 76391100|akrun |2023-06-02 14:35:51Z |If you have a numeric column in Percent, use cumsum i.e. cumsum(c(17.54, 21.05, 20.18, 22.81, 18.42))# [1] 17.54 38.59 58.77 81.58 100.00 |
## | 76391100| 76391100|MKN17 |2023-06-02 14:38:53Z |That worked! I had also tried cumsum() but was doing it with a mutate and it wasn't returning the correct figures. Thanks again! |
## | 76391100| 76391100|akrun |2023-06-02 14:39:49Z |You may need to update the original object i.e. df1 <- df1 %>% mutate(Total_Percent = cumsum(Percent)) |
## | 76391100| 76391100|MKN17 |2023-06-02 14:47:42Z |That's what I was originally doing. But the result was that it created a new collar (Total_Percent) where the values where just the values from the Percent col even though cumsum(Percent) was being used. |
## | 76391093| 76391093|I_O |2023-06-02 15:02:38Z |Welcome on Stack Overflow, Cato. Your question sounds more about the choice of (statistical) method and workflow than their specific implementation in R language. It might be more efficient to first research over at: stats.stackexchange.com and, in case, come back for technical details . |
## | 76391093| 76391093|Yacine Hajji |2023-06-02 15:40:18Z |Are the 6 treatments 6 different treaments or the same treatment with 6 different doses? |
## | 617683| 617683|Justin |2009-03-11 01:32:31Z |Thanks! "Please enter at least 10 characters." is a dumb requirement. |
## | 617683| 617683|Harsha M V |2010-07-22 17:59:31Z |can i add this to app model instead ? |
## | 76390798| 76390798|jpsmith |2023-06-02 13:59:18Z |one of those columns likely isn't in your dataset wide_data. If you edit your question to provide your dataset structure (using dput(head(wide_data))) we could provide more specific assistance |
## | 76390798| 76390798|MrFlick |2023-06-02 14:10:46Z |It's easier to help you if you include a simple reproducible example with sample input that can be used to test and verify possible solutions. R rarely lies to you so it must think those columns have different names. What does names(wide_data) return? |
## | 76390478| 76390478|ZheFrench |2023-06-02 13:18:16Z |dmonths(x)%>% as.numeric('days') did the trick |
## | 76390426| 76390426|Rui Barradas |2023-06-02 13:42:43Z |1) You copied the function from this SO post but changed it, use the original and everything will be alright; 2) When you run A + scale_y_continuous(label=scientific_10) you don't assign the result back to A so in the last line the y-axis labels haven't changed, they are still not in scientific notation. |
## | 76390426| 76390426|Rui Barradas |2023-06-02 13:43:24Z |3) I am closing as duplicate. It is also a simple typo. |
## | 76390426| 76390426|Miquel |2023-06-02 14:26:51Z |Yes, I copied the function from a post, but in my case, the "x" is a empty square and I don't know how to fix it... |
## | 76390426| 76390426|Rui Barradas |2023-06-02 17:34:44Z |OK, the duplicate is wrong. is is a fonts or graphics device problem. Either have to install missing fonts or change graphics backend. Reopened. |
## | 76390374| 76390374|br00t |2023-06-02 13:11:38Z |Please read this article: stackoverflow.com/questions/5963269/… |
## | 76390374| 76390374|br00t |2023-06-02 13:56:17Z |The error returned by the api is quite explicit: "NID AUTH Result Invalid (1000) : Authentication failed. (ì¸ì¦ì ì¤í¨íìµëë¤.)" it appears you are not using valid credentials to access the API |
## | 76390329| 76390329|zx8754 |2023-06-02 12:59:06Z |Do you meant to increase the height of those density curves to cover the whole axis? Maybe you need to scale it before plotting. |
## | 76390329| 76390329|M. Beausoleil |2023-06-02 13:18:33Z |Sorry for not being clear, no, I just want to 'raise' the density plot so that they don't 'sit' on x = 0 or y = 0, but that I could make the density plots on x = 13 or y = 20. Is that clearer? |
## | 76390329| 76390329|Peter |2023-06-02 14:03:27Z |Have you seen the answers to this question: none directly address the requirement to place density plots within the plotting area but the packages listed may help: stackoverflow.com/questions/8545035/… |
## | 76390329| 76390329|M. Beausoleil |2023-06-02 14:17:44Z |I did see those. The problem is that when I want to customize a bit more, there are limitations. It was easier for me to find how to just move the density plot in a single ggplot graph. |
## | 76390329| 76390859|zx8754 |2023-06-02 20:47:23Z |We can replace x = 12 and y = 20 with ceiling(max(df$x)) and ceiling(max(df$y)) to make it more automated. |
## | 76390196| 76390196|br00t |2023-06-02 13:46:23Z |If you do not want to troubleshoot a reinvented wheel you can try an out-of-the-box method like the zoo::rollapply() function to acheive the same objective |
## | 76389710| 76390565|user18791303 |2023-06-02 14:43:34Z |thanks, this was already very helpful to get a general idea. I tried to run your raster algebra, but I get the error message [sds] extents do not match (). It is possible that not all raster cells overlap exactly, due to the conversion of the texture and landuse layers from shape files... |
## | 76389710| 76390565|I_O |2023-06-02 14:54:24Z |Could be a few reasons ( google.com/search?q=R+terra+match+raster+extents ). Note that you can also use a single existing "master" raster layer (e. g. carbon?) as a joint rasterizing template: rasterize(vect(your_shapefile.shp), your_carbon_raster). |
## | 76389697| 76389697|zachi |2023-06-02 11:23:24Z |as you can see from the output , I have white spaces and sentences that are divided to two lines, I would like to have it in a vector or a list , that each element will be a sentence |
## | 76389697| 76389697|Peter |2023-06-02 11:35:47Z |Please include the output of dput(heraclitus_sentences) in the question to make it reproducible. |
## | 76389613| 76389613|ThomasIsCoding |2023-06-02 11:11:19Z |better provide the data for matrix1 |
## | 76389613| 76389613|r2evans |2023-06-02 11:30:26Z |(1) You say "append" but matrix2[j,] <- is assignment. If you want to append rows, rbind seems like what you need. (2) I'm inferring that you want to append the row once, but your matrix1[i,] is being added j-times (number of rows in matrix2). In addition to adding the sample data that ThomasIsCoding requested, I think you should also include your expected output given that sample data. We don't need big matrices, perhaps just 5-10 rows (some matches, some not) and 2-3 columns. When using dput, please give us all of it, not just a portion, perhaps dput(matrix1[1:10,1:3]). |
## | 76389510| 76389510|stefan |2023-06-02 11:00:08Z |Try DF$`Opt-Numbers` <- ifelse(DF$Numbers >= 4, "Opt-CMM", "Opt-MM")) |
## | 76389510| 76389510|Usman YousafZai |2023-06-02 11:11:53Z |@stefan It works. Kindly can you add your answer ? thanks |
## | 76389386| 76389476|Sanna |2023-06-02 12:34:53Z |Thank you so much! This worked perfectly. I tried adding unique but I did not manage to figure out that there needed to be unique(.x) - can you explain to me what the .x is referring to? Thank you! |
## | 76389386| 76389476|stefan |2023-06-02 12:49:01Z |Hi Sanna. .x works the same way as . which you used, i.e. you can use one or the other. Personally I prefer the .x. And both options are special symbols to refer to the function argument when using lambda functions ie. when doing ~ f(.x) instead of function(x) f(x). |
## | 76389243| 76389243|Jetchisel |2023-06-02 10:26:52Z |Use "${10}" or "$@" ? but the question is to vague though |
## | 76389243| 76389243|guillaume |2023-06-02 10:47:27Z |sorry new here and don't really know how to put it better, my script already use 9 argument so it work, but when i had a 10th argument i have an error (on my terminal) meaning it can't detect the 10th one (i tried the same command with 9th argument and it work) so the question is more how can i use a 10th argument on my script ? it seems commandArgs only let me use 9 argument max. |
## | 76389243| 76389243|Jetchisel |2023-06-02 10:59:51Z |Show some code, and paste your script at shellcheck.net |
## | 76389243| 76389243|r2evans |2023-06-02 11:39:03Z |Welcome to SO, guillaume! I've heard of 9 being a (soft) limit for windows cli utils, but there are ways around it (stackoverflow.com/a/38390433/3358272). I don't know of any limit imposed by commandArgs. It would be very helpful if you make your question more reproducible, namely a minimal script attempt, how you are calling it (including the shell type, i.e., cmd or bash), and all errors/warnings. Please see stackoverflow.com/q/5963269 , minimal reproducible example, and stackoverflow.com/tags/r/info. Thank you! |
## | 76389243| 76389243|guillaume |2023-06-02 12:42:30Z |thanks you ll do it and try make a little model to show what i'm talking about. |
## | 76389243| 76390318|Dirk Eddelbuettel |2023-06-02 12:54:16Z |The quesstion was about an R program, not a shell script. |
## | 76389243| 76390318|chandra |2023-06-02 13:20:13Z |Point taken @DirkEddelbuettel. The question is not very clear (to me at least). It does mention my bash script and I answered just in case it helps guillaume. |
## | 76389050| 76389050|NicChr |2023-06-02 11:00:57Z |If ID R009 doesn't get assigned the year 2013, should ID R007 get assigned the year 2008 as they have records only for the last 3 months of 2008? |
## | 76389050| 76389050|Albert |2023-06-02 11:54:09Z |Sorry @NicChr, I made a typo in the first two rows. The startdate should be 2008-01-01 and not 2008-10-01. I corrected the mistake in th above question. Sorry for the inconvenience. |
## | 76389050| 76389929|Albert |2023-06-02 12:06:32Z |Thank you for your answer. I indeed do not want to have duplicates, but your solution does not solve my problem entirely. As inidicated in the question, I would like to have the month June to be the leading indicator for the statenumber (or address). So this would mean that individual R008 should only have the statenumber 3 and individual R009 only the statenumber 4 (as both live in june of a particular year at that address). |
## | 76389050| 76389929|r2evans |2023-06-02 12:15:15Z |See my edit @Albert, hope that works |
## | 76389028| 76389028|teunbrand |2023-06-02 10:01:59Z |I don't think there is a ggploty way of doing this. I've been working on something like this in a ggplot2 extension, but isn't in a useable state at the moment. |
## | 76389028| 76389028|Lenman55 |2023-06-02 10:04:32Z |Thank you for your answer. If i may ask, how would you proceed in my case? |
## | 76389028| 76389028|teunbrand |2023-06-02 10:33:27Z |I'd probably use facets with zero panel spacing to imitate the outcome, using the strip label as the header. |
## | 76388858| 76388858|Peter |2023-06-02 11:55:10Z |You can control legend title size with plg = list(title = "Score", title.cex = 1.5) for example, not sure how to change legend title angle and position. |
## | 76388339| 76388339|Maël |2023-06-02 08:23:56Z |Note that your data is not reproducible because it uses internal pointer. Check here for more: stackoverflow.com/questions/25533332/… |
## | 76388032| 76388032|AdroMine |2023-06-02 07:35:14Z |this is called converting data to wide-form, You can look at the function pivot_wider from tidyr package. tidyr.tidyverse.org/reference/pivot_wider.html |
## | 76388032| 76388359|fatemeh |2023-06-02 13:46:27Z |Thank you so much for your guidance. I tried the code that you have suggested but I got this error : Error in pivot_wider(): ! Can't convert fill` <double> to <list>` could you help me to resolve this error please? |
## | 76388032| 76388359|Jorge A |2023-06-02 14:41:18Z |@fatemeh it seems that your column is numeric, you can convert the mutation column first to a character type before using pivot wider. Like so dataframe1$mutation <- as.character(dataframe1$mutation) |
## | 76388032| 76388359|fatemeh |2023-06-02 18:48:23Z |dear @Jorge A : thanks for your response and help :) Actually all columns in my data frame are in character class. |
## | 76388032| 76388359|fatemeh |2023-06-02 19:16:47Z |As you said in order to clarify my issue I edit question box with give an example. I will be grateful if you take a look at it. |
## | 76388032| 76393047|Onyambu |2023-06-02 19:44:18Z |Please if you downvote, you should consider leaving a comment so one can tell what the problem is |
## | 76388032| 76393047|fatemeh |2023-06-03 09:28:42Z |Thank you so much for your answer. It was useful and awesome :) |
## | 76388032| 76393047|Onyambu |2023-06-03 16:29:07Z |@fatemeh consider accepting it as the solution. |
## | 76387964| 76387964|Rob |2023-06-02 07:31:54Z |DO NOT post images of code, data, error messages, etc. - copy or type the text into the question. How to Ask |
## | 76387937| 76387937|benson23 |2023-06-02 07:21:33Z |Just include date in the group_by in @Maël's original answer. i.e. ... %>% group_by(gp = sub('.', '', sub), date) %>% ... |
## | 76387472| 76387472|Onyambu |2023-06-02 06:10:14Z |try to use an example that contains 99999. The example you gave does not contain the number in question |
## | 76387472| 76387472|Taren Sanders |2023-06-02 06:12:37Z |Your MRE runs fine for me. Is it possible that the MRE doesn't include the problem column? |
## | 76387472| 76387472|HJ WHY |2023-06-02 07:35:24Z |@Onyambu I will try to be more meticulous in uploading MRE. |
## | 76387472| 76387472|HJ WHY |2023-06-02 07:35:29Z |@Taren Sanders yes you are right. |
## | 76387472| 76387866|HJ WHY |2023-06-02 07:33:58Z |> grep("name", colnames(as)) [1] 7058 and > grep("value", colnames(as)) [1] 7059 . > sum(grepl("value", colnames(as))) [1] 1 and > sum(grepl("name", colnames(as))) [1] 1 |
## | 76387472| 76387866|HJ WHY |2023-06-02 07:34:35Z |There were 2 problematic columns. Thank you. |
## | 76387441| 76390023|Nader Mehri |2023-06-02 17:09:35Z |Thanks for your helpful clarification! In your suggested solution, we set the values for Petal.Length to be 1, 4, and 6. If this is correct, I wonder what values are set for Petal.Width? Also, I wonder how the below-suggested code is different than yours: Petal: predicted_probs <- predict(model, newdata = datagrid(Petal.Length = c(1, 4, 6)), type = "probs") |
## | 76387441| 76390023|Vincent |2023-06-02 19:00:29Z |datagrid sets other variables to their means, but that can be customized. See the documentation. In this case, there should be no difference between the two, except that predictions() and avg_predictions() return uncertainy estimates. There are a ton of examples and detailed tutorials in the Predictions vignette here: vincentarelbundock.github.io/marginaleffects/articles/… |
## | 76387295| 76387295|jay.sf |2023-06-02 05:18:12Z |Did you forget to "attach" the error? |
## | 76387189| 76387189|Taren Sanders |2023-06-02 05:51:57Z |It's a bit of a guess, but in the NEWS for R 4.3, on page 20 it says: > The header âS.hâ which has been unsupported since Jan 2016 has been removed. Use âR.hâ instead. In scalop.h it still includes this reference. So I would think that downgrading to 4.2 might fix the issue, or you could fork the repo and try editing the reference (but, that might require other edits). |
## | 76387189| 76387189|Vincent Laufer |2023-06-02 12:44:55Z |Thank you Taren. This will be helpful to lots who navigate here |
## | 76387189| 76387609|Vincent Laufer |2023-06-02 11:59:42Z |WOOOWWWWWW. THANK YOU! Terrific answer. Will work through this and report back. thank you so much for your time. |
## | 76387189| 76387609|Vincent Laufer |2023-06-02 12:32:24Z |For context on differences between namespace and attach (referred to in Jared's answer), please see: stackoverflow.com/questions/14988722/… |
## | 76387189| 76390365|jared_mamrot |2023-06-02 22:09:30Z |Super frustrating problem, but I'm glad you got it sorted :) |
## | 76387189| 76390365|Vincent Laufer |2023-06-03 07:48:48Z |It actually wasn't jared, but, that's entirely thanks to you. thank you so much for your help. |
## | 76387189| 76390365|Vincent Laufer |2023-06-03 07:49:49Z |Jared - a quick observation - I wonder if the decision not to reinstall GCC will create fewer failures/compatiblity issues down the road (like what you note above). As a corollary, this experience really leaves me wondering what the differences in the installation process that are implied by devtools::install_github() contra BiocManager::install(). in other words, what parts of the solution depend on your fork, versus what parts (if any) depend on use of BiocManager() |
## | 76387115| 76387115|Sinh Nguyen |2023-06-02 04:40:55Z |Missing stat="identity" in the geom_bar function |
## | 76387115| 76387115|Jon Spring |2023-06-02 07:12:04Z |Take a look at the help for ?geom_bar: There are two types of bar charts: geom_bar() and geom_col(). geom_bar() makes the height of the bar proportional to the number of cases in each group (or if the weight aesthetic is supplied, the sum of the weights). If you want the heights of the bars to represent values in the data, use geom_col() instead. geom_bar() uses stat_count() by default: it counts the number of cases at each x position. geom_col() uses stat_identity(): it leaves the data as is. |
## | 76387115| 76387115|Jess |2023-06-02 11:36:15Z |@JonSpring - aha!!! Thank you! I tried once with geom_col() but to be honest I probably didn't pay close enough attention to the resulting error, and looking up geom_col() vs. geom_bar() was one of the rabbit holes I needed to go down but I chose others instead (poor judgment, obviously!) Thank you, I'll give that a try this morning, |
## | 76387115| 76387115|Jon Spring |2023-06-02 16:14:09Z |You will be more likely to get useful advice if you can provide a reproducible example. 1) Sample of data (as code, e.g. using the dput function), 2) what you expect/want, and 3) what you've tried. (Current question has #3 and I'm not sure I understand #2.) |
## | 76386904| 76387453|oriole1231 |2023-06-02 17:05:58Z |I am beginning to see now! Thank you! I am sorry for my little knowledge. I will need to go back in and fix my data sheet. Unfortunately, tree number is not unique. It corresponds to the location in the field where the sample was taken, so for example, Trees 1 - 2 of a type on Date 1 in Field 3 were not the same as Tree 1 - 2 of the same type on Date 2 in Field 3, etc. Also, if I wanted to give the average number of moths based on all sampled trees, would I need to make a sum column like the avg you created, and somehow calculate the overall average from all trees? Thank you! |
## | 76386904| 76387453|neilfws |2023-06-03 04:42:36Z |I suggest you just experiment with and/or read about how group_by works, because that is the key thing here - you group on whatever combination of variables gives you the desired mean value. |
## | 76386904| 76387453|oriole1231 |2023-06-03 07:56:07Z |OK, I see! Thank you very much! I will do this. Thank you again for the help with this! |
## | 76386843| 76386866|rez |2023-06-02 03:06:08Z |thanks but z should be returned as an integer not TRUE and FALSE. |
## | 76386843| 76386866|thelatemail |2023-06-02 03:12:54Z |@rez - shift the parenthesis back one part - mutate(z = rowSums(pick(newX, y) == 1)) |
## | 76386843| 76386866|zephryl |2023-06-02 03:18:54Z |@rez Ah, I misread your original code. I edited my answer in line with @thelatemailâs correction. |
## | 76386840| 76386840|IRTFM |2023-06-02 03:31:46Z |Perhaps because apostrophes are not punctuation? |
## | 76386786| 76387136|joshbrows |2023-06-02 11:58:01Z |It does work, but it doesn't display anything in the tooltip. If I change it to glue::glue('Bin Size = {after_stat({count})}'), it doesn't change the tooltip. |
## | 76386732| 76386732|Onyambu |2023-06-02 02:14:51Z |highlight and press contol/command enter |
## | 76386620| 76386620|Onyambu |2023-06-02 01:48:20Z |could you fill in the values ## for site1? |
## | 76386620| 76386620|IRTFM |2023-06-02 01:48:30Z |Double octothorpes for data (or results)? You should say what the research question is and do the calculations by hand to get to the starting point for your desired result. You should also admit if this is homework. |
## | 76386429| 76386429|K J |2023-06-02 00:17:05Z |<Message>Request has expired</Message> |
## | 76386429| 76386429|K J |2023-06-02 00:24:59Z |hmm some of those links wil redirect e.g. mdba.gov.au/publications/mdba-reports/… which then results in mdba.gov.au/sites/default/files/pubs/… |
## | 76386409| 76386409|Baraliuh |2023-06-02 00:25:39Z |You have a missing comma and quote here paste0'). Possibly some missing code? |
## | 76386402| 76386402|Jon Spring |2023-06-02 00:12:12Z |I don't see anything obvious in the 1.1.2 or 1.1.1 release notes that applies here. cran.r-project.org/web/packages/dplyr/news/news.html. One note, I think it would be better practice to use FALSE and TRUE instead of F and T, as those values can be overwritten, e.g. F <- TRUE will create chaos for your code. |
## | 76386402| 76386402|Jon Spring |2023-06-02 00:15:20Z |It's also unnecessary to have a test like TEST == T when you can just use TEST, assuming it's a logical value already. |
## | 76386402| 76386402|Jon Spring |2023-06-02 00:17:56Z |There is only one case, which makes me wonder why you are using case_when; presumably mutate(SY_Flag = SY_Dup2 & lag(SY_Dup2, default = FALSE) & Location == 'Sales' & lag(Location == 'Sales') & !SY_Del & !lag(SY_Del, default = TRUE)) would do the same thing? |
## | 76386402| 76386402|Jon Spring |2023-06-02 00:19:36Z |BTW, what is the mutate error? Unexpected result or do you get error text? |
## | 76386402| 76386402|Hank |2023-06-02 00:27:09Z |@JonSpring the error is : "Error in mutate(): â¹ In argument: SY_Flag = case_when(...). â¹ In group 1: TagRFId = "942 000004690323". Caused by error in case_when(): ! Failed to evaluate the left-hand side of formula 1. Caused by error in vec_c(): ! Can't convert ..1 <logical> to <vctrs_unspecified>" I bypassed it by Sy_Flag = FALSE |
## | 76386402| 76386402|Hank |2023-06-02 00:31:38Z |@JonSpring Yes, I agree on practising TRUE and FLASE, Its just that I have been given a repo with lots of code and the plan is to do tweaks in time. I did change thm in this case and didn't make a difference. I am reluctant to accept its the package issue since the mutate function is being used in other parts of the code with no error. But as we are using the same repo and code and all other variables are the same except the package and RStudio versions, it made me investigate the possibility. |
## | 76386402| 76386402|Jon Spring |2023-06-02 01:37:13Z |I suspect the issue is that you have not specified what the value should be when the tests are not met. In the past, people would often end case_when() calls with a line like TRUE ~ NA_logical_) so any cases where tests aren't met outputs a logical-type NA. Now there's .default. You haven't specified what should happen then. I think the error might be related to the underlying vctrs package not knowing what type to output when TagRFId = "942 000004690323" -- perhaps that's a group where all the outputs fail the test, so the output is NA, but it doesn't know which type of NA to give. |
## | 76386402| 76386402|Hank |2023-06-02 04:00:26Z |@JonSpring hmm, so do u think the issue is with vctrs package? However, I find it puzzling that some team members can run the code without encountering any errors. The only difference seems to be that they have newer versions of R and the related packages. To test this theory, I attempted to unload the newer package and install an older version of plyr using different methods like renv and <Rtools>. Unfortunately, I haven't had any success due to the constraints imposed by our organisational policy, which prevents updating the Rtools path in the system environment. |
## | 76386402| 76386402|Hank |2023-06-02 04:07:18Z |Let us continue this discussion in chat. |
## | 76386356| 76386356|Ricardo Semião e Castro |2023-06-01 23:53:47Z |Could you post a snippet of your data? Paste the output of the function dput(df) |
## | 76386347| 76386733|Feynman |2023-06-02 03:00:01Z |Is there a way I can assign the newly produced data table to a separate variable? I'm not sure if putting the assignment operator in front of "for" is gonna work. Also, since I need to calculate log of base 2, it should be value = log(DT[j],2)+1, right? Also unsure why you have 2 brackets within log. |
## | 76386347| 76386733|zephryl |2023-06-02 03:04:18Z |I would just make a copy beforehand â see my edited answer. |
## | 76386347| 76386733|zephryl |2023-06-02 03:09:38Z |And yes, youâd use log(DT[j],2)+1. |
## | 76386347| 76386733|Feynman |2023-06-02 03:49:46Z |So I had to do a separate process to change all the 0's to "NA"'s but other than that, the code worked perfectly. Thank you! |
## | 76386335| 76386335|thelatemail |2023-06-01 23:40:18Z |unique from base or distinct from dplyr should do it. |
## | 76386335| 76386335|neilfws |2023-06-01 23:40:29Z |I think you just want df %>% dplyr::distinct(names, class). |
## | 76386256| 76386256|Jon Spring |2023-06-01 23:53:49Z |Pretty sure this isn't supported by officer, but I have seen other examples of people getting javascript running in powerpoint: stackoverflow.com/questions/67519747/… reddit.com/r/d3js/comments/qb3dr9/… |
## | 76386162| 76386162|thelatemail |2023-06-01 23:26:15Z |If your overall aim is to just keep the first > 600 value in each row, then you could do: original_data[col(original_data) > max.col(original_data > 600, "first")] <- NA |
## | 76386109| 76386109|neilfws |2023-06-01 22:35:03Z |This question is better suited for Cross Validated. You'll get better answers if you include more details (data and complete code). |
## | 76386109| 76386109|DaveArmstrong |2023-06-01 22:43:00Z |From the fixest package there is a function called feglm that will do what you want, you could do: feglm(watched_seconds ~ groupB + time | genre+ language + user, data=data, family="gaussian") - this assumes time isn't categorical, but continuous. If it is categorical, you can stick it with the other categorical controls on the right-hand side of the |. |
## | 76386104| 76386208|Mauro r |2023-06-02 08:43:12Z |That works great, thank you, @stefan! It is just an odd behaviour of the plot. Tagging @jbryer in case he might see this. |
## | 76386104| 76386208|stefan |2023-06-02 08:51:31Z |This is most likely a bug as the labels are for example placed in the "correct" order. Perhaps an issue resulting from a change in ggplot2 ... |
## | 76386018| 76386018|br00t |2023-06-01 22:11:48Z |The dot (".") symtax is meant to be used with maggritr pipes (%>%) not the R native pipe (|>). |
## | 76386018| 76386018|br00t |2023-06-01 22:18:44Z |Please add the input data prices to your post by using dput() see: stackoverflow.com/questions/5963269/… |
## | 76386018| 76386018|MNK2008- |2023-06-02 00:08:51Z |@broot I have edited to contain the first 15 rows using dput() |
## | 76385903| 76385903|Ben Bolker |2023-06-01 21:49:06Z |@TarJae, please don't answer in comments/post as an answer ... |
## | 76385903| 76385903|r2evans |2023-06-01 22:04:45Z |datatable is ambiguous, can is often for the datatable plugin for jQuery, not at all related to R. You might be referring to data.table (confusing, I know), which is for the data.table package, but your use of sample_n suggests dplyr. The tag-recommendation system on Stack is certainly imperfect, please check them when you ask questions, easily done by mousing over and reading the ALT text. If you would accept answers in either data.table or dplyr syntax, then I suggest you tag both and be explicit. Thanks! |
## | 76385903| 76385903|r2evans |2023-06-01 22:06:48Z |is axis='columns' a new option for dplyr::sample_n? I have not seen it before |
## | 76385903| 76386069|Feynman |2023-06-01 22:22:31Z |This also only returns a set of integers |
## | 76385903| 76386069|Leroy Tyrone |2023-06-01 22:28:26Z |@Feynman - updated, I forgot the with = FALSE |
## | 76385903| 76385945|Feynman |2023-06-01 21:53:21Z |This didn't return a data table, only a set of integers. Also, is there a way I can have the selection process occur at a certain column number (since the first 2 columns need to be kept in)? |
## | 76385903| 76385945|TarJae |2023-06-01 21:54:47Z |Try this randomTable <- mtcars[, sample(ncol(mtcars), 3)] -> this will select 3 columns out of mtcars dataset! |
## | 76385903| 76385945|Feynman |2023-06-01 22:00:36Z |yes, this produced a data table but it doesn't fix the issue with the table I need to use |
## | 76385903| 76385945|Feynman |2023-06-01 22:15:39Z |For the second part of the code, it returns the error "no applicable method for 'select' applied to an object of class "character"" What I wrote is: select('Name', 'Description', all_of(selected_columns)) (because the two columns I needed to keep are labeled "Name" and "Description" |
## | 76385903| 76385945|TarJae |2023-06-01 22:20:27Z |Remove the ticks: select(Name, Description, all_of(selected_columns)) |
## | 76385903| 76385945|Feynman |2023-06-01 22:21:42Z |Now it gives the error: "Error in UseMethod("select") : no applicable method for 'select' applied to an object of class "c('integer', 'numeric')"" |
## | 76385903| 76385945|TarJae |2023-06-01 22:25:31Z |Hmmh... Try dplyr::select(Name, Description, all_of(selected_columns)) |
## | 76385903| 76385945|Feynman |2023-06-01 22:28:55Z |It still gives the same error |
## | 76385903| 76385945|TarJae |2023-06-01 22:30:23Z |the please provide some sample data with dput(head(ogTable)) |
## | 76385712| 76385712|Rui Barradas |2023-06-01 21:18:11Z |I cannot reproduce the error, with me the (1st and 2nd) boxes are not skinny. |
## | 76385712| 76385712|TarJae |2023-06-01 21:44:54Z |@RuiBarradas Transform date to datetime: library(lubridate) df %>% mutate(date = ymd_hms(date)) %>% then the boxplots get skinny. |
## | 76385712| 76385712|Jon Spring |2023-06-01 22:11:33Z |boxplot width 1 will make the boxes 1 unit wide. With POSIXct in the x axis, 1 unit is 1 second, which is far too narrow for this data. Try width = 60*60*24*5 to make them 5 days wide, for instance. Tricky b/c I see some are closer than that so you'll have overlaps. |
## | 76385712| 76385712|hollyrose54 |2023-06-01 22:20:08Z |@JonSpring Oh, wow! I had no idea the width was in seconds for POSIXct. Thank you so much! I really appreciate it. |
## | 76385712| 76385864|hollyrose54 |2023-06-01 21:50:48Z |thank you for answering! So, I tried this: sylvan$date3 <- as.character(sylvan$date) library(dplyr) sylvan %>% ggplot(aes(x = date3, y = Chloride_mgL, group = date3)) + geom_boxplot(width = 2, outlier.shape = NA, color = "dark blue") + theme_minimal() + labs(y = "Chloride (mg/L)") and it made the boxes wider, but the x axis is really weird now - let me see if I can add a picture |
## | 76385712| 76385864|TarJae |2023-06-01 21:52:52Z |Try to toggle position_dogde(xx) here geom_boxplot(width = 2, outlier.shape = NA, color = "dark blue", position = position_dodge(0.5)) + |
## | 76385712| 76385864|hollyrose54 |2023-06-01 22:06:24Z |That made the boxes perfect, thank you so much! One more question though, I wanted the x axis to look like it did in my original pictures (with labels like June, July, etc) but now that I have the x axis as characters, it's displaying every single tick with the entire date/time so it looks super messy and unreadable. Any advice there? (Realizing this might be basic, but I'm still really new to R!) |
## | 76385712| 76385864|TarJae |2023-06-01 22:11:55Z |One way is to change the angle add: theme(axis.text.x = element_text(angle = 90, hjust = 1)) . You can use 45 also. Just try out. there are other ways setting breaks etc. or usning scale_x_date etc... but this should be a separate question. |
## | 255697| 255992|mercator |2008-11-01 20:42:03Z |I'm rather curious myself now. Is it the dirichlet() function in the VGAM package? |
## | 255697| 255992|Alex Coventry |2008-11-01 22:22:36Z |Thanks, this was very helpful. I think it's probably dirmul in the VGAM package. finzi.psych.upenn.edu/R/library/VGAM/html/dirmul.old.html |
## | 255697| 255992|mercator |2008-11-03 18:00:37Z |Thanks... Ooh, HTML documentation. I couldn't find that before. |
## | 127137| 127147|kaybenleroll |2008-09-24 13:34:40Z |I have done that, just realised I should mention that. :) |
## | 127137| 127147|zvrba |2008-09-24 13:38:00Z |Then you might want to look into CRAN: an online collection of R modules, with source code: cran.r-project.org |
## | 127137| 132781|Iterator |2012-02-16 03:41:20Z |It's interesting to look at older questions, and note how CRAN has grown since then. :) |
## | 127137| 133237|Dirk Eddelbuettel |2009-07-26 21:12:10Z |Re your point 3: this shows the 'parsed' R code which has all comments stripped. Good for a quick look, but the source packages may well be more informative. |
## | 127137| 133237|Gregg Lind |2009-07-26 21:59:28Z |Of course they're more informative! This method also fail for code that calls out to C programs. But it's hard to beat for speed of access, and to snoop around to find out some arcane detail. |
## | 127137| 821947|George D Girton |2014-08-17 20:34:26Z |The R GraphGallery link is dead |
## | 127137| 1185572|fgm2r |2011-08-14 16:56:46Z |The home page seems to have moved: rwiki.sciviews.org/doku.php |
## | 127137| 127176|Sampson |2009-07-23 18:21:18Z |What the heck...This blog is just a bunch of climate data. I thought I was going to actually benefit from this accepted-answer. |
## | 127137| 127176|Sampson |2009-07-23 19:09:45Z |@Boden, which makes it a pathetic 'answer.' Might as well link to Google from now on for every question. |
## | 127137| 127176|dbr |2009-07-26 07:32:01Z |I've added a few more-direct links to R related stuff, which wasn't immediately obvious to find.. |
## | 127137| 127176|nagul |2009-08-01 14:13:54Z |Probably because of this thread: meta.stackexchange.com/questions/8584/…. I don't think this answer deserves to be chosen, nor downvoted to this extent. The person who chose this answer deserves the downvotes more, imho. |
## | 127137| 127176|kaybenleroll |2009-08-08 04:17:22Z |As the guy who accepted the answer, I can honestly say there was no gaming. It seemed to be the best answer I got at the time, but it was a while ago, so I don't really remember. |
## | 127137| 127176|agentofuser |2009-12-12 14:24:54Z |The link to the scripts category redirects you to a single post ("Category Added" - climateaudit.org/2005/08/30/category-added). This link worked best for me: climateaudit.org/category/scripts |
## | 102056| 102056|aku |2008-09-19 14:06:46Z |How is that programming question? Need some google-fu? go to johnny.ihackstuff.com |
## | 102056| 102056|Saurabh Hooda |2016-12-13 11:54:44Z |hackr.io/tutorials/learn-r is a great place to find the best R programming tutorials submitted and voted by the programming community. |
## | 102056| 215504|Peter Ellis |2013-04-25 07:45:49Z |this wouldn't work these days - S-Plus is history now and I imagine most R users have never touched it. And most current R packages were developed without reference to it. |
## | 102056| 5746388|richiemorrisroe |2012-05-21 10:23:09Z |please note that the + no longer works (because of Google plus) but enclosing the string in quotations marks still does. |
## | 102056| 1257045|unutbu |2010-02-15 14:29:44Z |To search for the "R" tag on Stackoverflow, type [R] in the search field. |
## | 102056| 102610|Gregg Lind |2008-09-19 18:04:08Z |May vary? Definitely varies, since there's a ton of material not on R's official site. |
## | 95007| 1463249|Gregg Lind |2009-09-23 13:18:22Z |Thank you for actually answering my question :) That was a serious amount of detective work. |
## | 95007| 1463249|AFoglia |2009-09-23 14:13:35Z |No problem. I'm trying to write a quantile function for Python/Numpy for our group, which lead me to this question. When I eventually found the answer, I figured I'd share. |
## | 95007| 1463249|Rob Hyndman |2009-09-27 12:08:20Z |I wrote the quantile() function and the associated help file and submitted it to the R core team in August 2004 (replacing the previous versions). I've just checked and all of these errors were caused by my help file being changed after I submitted it. (I am responsible for the use of p and p[k] though.) I'd never noticed it as I assumed my file would be left untouched. I'll see if I can get the help file fixed for R 2.10.0. |
## | 95007| 1463249|Rob Hyndman |2009-10-05 09:28:07Z |@AFoglia. I've put a proposed new help file at robjhyndman.com/quantile.html. Comments before I submit to Rcore? |
## | 95007| 1463249|AFoglia |2009-10-05 15:01:19Z |The new one is much better. I have a minor suggestion to add the definitions of gamma for methods one through three, although that might not be necessary for the statistical-knowledgable R audience. Otherwise, it looks great. |
## | 95007| 1463249|Rob Hyndman |2009-10-29 09:21:53Z |Just to complete this discussion, the new help file is now part of base Rv2.10.0. |
## | 95007| 1463249|Gregg Lind |2012-04-03 14:46:04Z |Every time I look at help(quantile) in R, I am pleased with how this turned out! |
## | 95007| 58754784|Lampard |2019-11-12 04:29:58Z |The detail explain of quantile.default() source code is very useful, lovely thanks |
## | 79709| 79709|Jeff Atwood |2008-09-17 03:52:45Z |I recommend editing this post to include some brief pseudocode so it's easier to see what you're trying to do |
## | 79709| 79709|smci |2016-04-28 13:23:48Z |Yes please add pseudocode, it's impossible to know what you mean. When you say "process... and store the result", do you mean "store the exact same string", or vector (of ints, referencing words), or what? Are you storing it as a record in some huge dataframe/array/matrix? Give us some idea of number of rows, columns, file, chunk and vector sizes, and your working memory? |
## | 77434| 77434|benefactual |2008-09-16 21:44:25Z |I am by no means an R expert, but a quick google turned up this: <stat.ucl.ac.be/ISdidactique/Rhelp/library/pastecs/html/…> There appears to be a "last" function. |
## | 77434| 77434|krlmlr |2013-02-13 11:55:05Z |Related: stackoverflow.com/q/6136613/946850 |
## | 77434| 77434|EngrStudent |2014-07-23 15:48:27Z |MATLAB has the notation "myvariable(end-k)" where k is an integer less than the length of the vector that will return the (length(myvariable)-k)th element. That would be nice to have in R. |
## | 77434| 83222|kpierce8 |2009-08-12 20:25:24Z |however x[length(x[,1]),] works on dataframes or x[dim(x)[1],] |
## | 77434| 83222|hadley |2009-08-13 13:33:56Z |Note that for data frames, length(x) == ncol(x) so that's definitely wrong, and dim(x)[1] can more descriptively be written nrow(x). |
## | 77434| 83222|jbaums |2015-04-28 00:38:50Z |@hadley - kpierce8's suggestion of x[length(x[,1]),] is not wrong (note the comma in the x subset), but it's certainly awkward. |
## | 77434| 83222|anonymous |2017-02-27 15:27:44Z |Please note that my benchmark below shows this to be slower than x[length(x)] by a factor of 30 on average for larger vectors! |
## | 77434| 83222|Andreas Storvik Strauman |2018-03-19 17:47:09Z |Doesn't work if you want to add stuff from vectors though tail(vector, n=1)-tail(vector, n=2) |
## | 77434| 37238415|Kwame |2017-01-10 19:02:57Z |^ O(1) solutions should be the only acceptable answer in this question. |
## | 77434| 37238415|sam |2018-09-22 18:41:51Z |Thanks for timing all those anon +1! |
## | 77434| 37238415|Endle_Zhenbo |2019-04-18 14:59:40Z |I tried mylastR=function(x) {x[length(x)} It's faster than mylast in Rcpp, but one time slower than writing x[length(x)] directly |
## | 77434| 37238415|Ingo Pingo |2022-01-25 09:02:28Z |Even with big vectors there is no meaningful difference. Transforming to seconds shows that for the longest vector the fastest method takes 0.000001133 seconds and the slowest method takes 0.102328667 seconds (both median). Well, nobody will notice that in real life. I would choose readabilty over benchmarks here. |
## | 77434| 83162|J. Mini |2020-06-04 00:07:50Z |Do note that if you want the last few elements of a vector rather than just the last element, there's no need to do anything complex when adapting this solution. R's vectorization allows you to do neet things like get the last four elements of x by doing x[length(x)-0:3]. |
## | 77434| 153852|anonymous |2017-02-27 15:28:29Z |Please note that my benchmark below shows this to be slower than x[length(x)] by a factor of 30 on average for larger vectors! |
## | 77434| 37687126|Rich Scriven |2016-06-07 18:53:52Z |This basically boils down to x[[length(x)]] again. |
## | 77434| 37687126|Sam Firke |2016-06-07 18:58:54Z |Similar under the hood, but with this answer you don't have to write your own function last() and store that function somewhere, like several people have done above. You get the improved readability of a function, with the portability of it coming from CRAN so that someone else can run the code. |
## | 77434| 37687126|Keith Hughitt |2016-07-04 13:23:07Z |Can also write as mtcars$mpg %>% last, depending on your preference. |
## | 77434| 37687126|anonymous |2017-04-04 13:12:40Z |@RichScriven Unfortunately, it's considerably slower than x[[length(x)]], though! |
## | 77434| 23638765|mschilli |2014-07-07 16:05:40Z |Why not testing tail(strsplit(x,".",fixed=T)[[1]],1) for the 2nd case? To me the main advantage of the tail is that you can write it in one line. ;) |
## | 77434| 21706190|Felipe Gerard |2016-10-13 15:57:58Z |This will be expensive though! |
## | 77434| 21706190|anonymous |2017-02-27 15:30:47Z |Please note that this is an operation whose computational cost is linear in the length of the input; in other words, while O(n), it is not O(1). See also my benchmark below for actual numbers. |
## | 77434| 21706190|James |2017-02-27 19:07:06Z |@anonymous Unless you use an iterator |
## | 77434| 21706190|anonymous |2017-03-01 16:28:17Z |@James Right. But in that case, your code also wouldn't work, would it? If by iterator you mean what's provided by the iterators package, then (1) you cannot use [1] to access the first element and (2) while you can apply rev to an iterator, it does not behave as expected: it just treats the iterator object as a list of its members and reverses that. |
## | 77434| 37686960|Rich Scriven |2016-06-07 18:53:37Z |This basically boils down to x[[length(x)]] again. |
## | 77434| 32510333|Gregor Thomas |2015-11-19 19:57:41Z |I appreciate that NROW does what you would expect on a lot of different data types, but it's essentially the same as a[length(a)] that OP is hoping to avoid. Using OP's example of a nested vector, dat$vec1$vec2[NROW(dat$vec1$vec2)] is still pretty messy. |
## | 77434| 32510333|Franck Dernoncourt |2017-07-17 16:34:18Z |may be written as nrow |
## | 77434| 32510333|PatrickT |2018-12-16 19:43:42Z |Note: Unlike nrow, NROW treats a vector as 1-column matrix. |
write.csv(final_dataframes[[1]], 'E:\\College\\UC Davis\\STA141B\\HW4\\questions.csv')
write.csv(final_dataframes[[2]], 'E:\\College\\UC Davis\\STA141B\\HW4\\answers.csv')
write.csv(final_dataframes[[3]], 'E:\\College\\UC Davis\\STA141B\\HW4\\comments.csv')
[1] https://stackoverflow.com/questions/32019566/r-xml-parse-for-a-web-address
[2] https://stackoverflow.com/questions/1604471/how-can-i-find-an-element-by-css-class-with-xpath
[3] https://stackoverflow.com/questions/18547410/xpath-with-multiple-contains-on-different-elements
[4] https://stackoverflow.com/questions/11455590/parse-an-xml-file-and-return-an-r-character-vector
[5] https://statisticsglobe.com/concatenate-vector-of-character-strings-in-r
[6] https://stackoverflow.com/questions/34570860/add-nas-to-make-all-list-elements-equal-length [
7] https://www.r-bloggers.com/2020/10/basic-error-handing-in-r-with-trycatch/